Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- README.md +2 -9
- app.py +194 -0
- best_per_variety_bar.png +0 -0
- confusion_matrices_side_by_side.png +3 -0
- eval_outputs/all_runs.csv +217 -0
- eval_outputs/best_adapter_per_variety.json +20 -0
- eval_outputs/confusion_matrices/best_on_en-AU.png +0 -0
- eval_outputs/confusion_matrices/best_on_en-IN.png +0 -0
- eval_outputs/confusion_matrices/best_on_en-UK.png +0 -0
- eval_outputs/errors_for_q4/README.md +16 -0
- eval_outputs/errors_for_q4/SmolLM2-1.7B_en-UK_seed2024_misclassified.csv +48 -0
- eval_outputs/matrices/Llama-3.2-1B_mean.csv +4 -0
- eval_outputs/matrices/Llama-3.2-1B_std.csv +4 -0
- eval_outputs/matrices/OLMo-2-0425-1B_mean.csv +4 -0
- eval_outputs/matrices/OLMo-2-0425-1B_std.csv +4 -0
- eval_outputs/matrices/Qwen2.5-1.5B_mean.csv +4 -0
- eval_outputs/matrices/Qwen2.5-1.5B_std.csv +4 -0
- eval_outputs/matrices/SmolLM2-1.7B_mean.csv +4 -0
- eval_outputs/matrices/SmolLM2-1.7B_std.csv +4 -0
- headline_heatmaps.png +3 -0
- main_q2_3.ipynb +0 -0
- q5_2_inference.py +37 -0
- requirements.txt +5 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
confusion_matrices_side_by_side.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
headline_heatmaps.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,13 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: yellow
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 6.14.0
|
| 8 |
-
python_version: '3.13'
|
| 9 |
-
app_file: app.py
|
| 10 |
-
pinned: false
|
| 11 |
---
|
| 12 |
-
|
| 13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: nlp_sarcasm_detector
|
| 3 |
+
app_file: app.py
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
sdk_version: 6.14.0
|
|
|
|
|
|
|
|
|
|
| 6 |
---
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Q5.1 Deployment: Sarcasm Detector for Global English Varieties.
|
| 3 |
+
|
| 4 |
+
Gradio web app that routes each request to the LoRA adapter trained on the
|
| 5 |
+
user-selected English variety (en-UK / en-AU / en-IN). Each adapter is loaded
|
| 6 |
+
once and cached in memory for all subsequent requests.
|
| 7 |
+
|
| 8 |
+
Run:
|
| 9 |
+
python app.py # local only
|
| 10 |
+
python app.py --share # public Gradio share link
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import argparse
|
| 14 |
+
import torch
|
| 15 |
+
import torch.nn.functional as F
|
| 16 |
+
import gradio as gr
|
| 17 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 18 |
+
from peft import PeftModel
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
# Adapter registry
|
| 23 |
+
# Each variety maps to the frozen base LLM and its variety-specific LoRA adapter.
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
VARIETY_CONFIG: dict[str, tuple[str, str]] = {
|
| 26 |
+
"British English (en-UK)": (
|
| 27 |
+
"Qwen/Qwen2.5-1.5B",
|
| 28 |
+
"berkinksk/besstie-sarcasm-en-UK-Qwen2.5-1.5B-seed2024",
|
| 29 |
+
),
|
| 30 |
+
"Australian English (en-AU)": (
|
| 31 |
+
"allenai/OLMo-2-0425-1B",
|
| 32 |
+
"berkinksk/besstie-sarcasm-en-AU-OLMo-2-0425-1B-seed2024",
|
| 33 |
+
),
|
| 34 |
+
"Indian English (en-IN)": (
|
| 35 |
+
"Qwen/Qwen2.5-1.5B",
|
| 36 |
+
"berkinksk/besstie-sarcasm-en-IN-Qwen2.5-1.5B-seed2024",
|
| 37 |
+
),
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 41 |
+
|
| 42 |
+
# In-memory cache: variety -> (tokenizer, model)
|
| 43 |
+
_cache: dict[str, tuple] = {}
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# ---------------------------------------------------------------------------
|
| 47 |
+
# Model loading
|
| 48 |
+
# ---------------------------------------------------------------------------
|
| 49 |
+
|
| 50 |
+
def load_model(variety: str) -> tuple:
|
| 51 |
+
"""Load and cache the tokenizer + LoRA model for a given variety."""
|
| 52 |
+
if variety in _cache:
|
| 53 |
+
return _cache[variety]
|
| 54 |
+
|
| 55 |
+
base_id, adapter_repo = VARIETY_CONFIG[variety]
|
| 56 |
+
|
| 57 |
+
tok = AutoTokenizer.from_pretrained(base_id)
|
| 58 |
+
if tok.pad_token is None:
|
| 59 |
+
tok.pad_token = tok.eos_token
|
| 60 |
+
|
| 61 |
+
base = AutoModelForSequenceClassification.from_pretrained(
|
| 62 |
+
base_id,
|
| 63 |
+
num_labels=2,
|
| 64 |
+
dtype=torch.bfloat16,
|
| 65 |
+
)
|
| 66 |
+
base.config.pad_token_id = tok.pad_token_id
|
| 67 |
+
|
| 68 |
+
model = PeftModel.from_pretrained(base, adapter_repo)
|
| 69 |
+
model.eval()
|
| 70 |
+
model.to(DEVICE)
|
| 71 |
+
|
| 72 |
+
_cache[variety] = (tok, model)
|
| 73 |
+
return tok, model
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ---------------------------------------------------------------------------
|
| 77 |
+
# Inference
|
| 78 |
+
# ---------------------------------------------------------------------------
|
| 79 |
+
|
| 80 |
+
@torch.no_grad()
|
| 81 |
+
def classify(text: str, variety: str) -> tuple[dict, str]:
|
| 82 |
+
"""
|
| 83 |
+
Returns:
|
| 84 |
+
confidence_dict -- {"Sarcastic": float, "Not Sarcastic": float}
|
| 85 |
+
result_markdown -- human-readable result string
|
| 86 |
+
"""
|
| 87 |
+
text = text.strip()
|
| 88 |
+
if not text:
|
| 89 |
+
return {"Sarcastic": 0.0, "Not Sarcastic": 0.0}, "⚠️ Please enter some text."
|
| 90 |
+
|
| 91 |
+
tok, model = load_model(variety)
|
| 92 |
+
|
| 93 |
+
inputs = tok(
|
| 94 |
+
text,
|
| 95 |
+
return_tensors="pt",
|
| 96 |
+
truncation=True,
|
| 97 |
+
max_length=256,
|
| 98 |
+
padding=True,
|
| 99 |
+
)
|
| 100 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 101 |
+
|
| 102 |
+
logits = model(**inputs).logits # shape: (1, 2)
|
| 103 |
+
probs = F.softmax(logits, dim=-1).squeeze().tolist() # [p_not_sarcastic, p_sarcastic]
|
| 104 |
+
p_not, p_sarc = probs[0], probs[1]
|
| 105 |
+
|
| 106 |
+
label = "Sarcastic" if p_sarc >= 0.5 else "Not Sarcastic"
|
| 107 |
+
confidence = max(p_sarc, p_not) * 100
|
| 108 |
+
|
| 109 |
+
confidence_dict = {
|
| 110 |
+
"Sarcastic": round(p_sarc, 4),
|
| 111 |
+
"Not Sarcastic": round(p_not, 4),
|
| 112 |
+
}
|
| 113 |
+
result_md = f"### Prediction: **{label}**\nConfidence: {confidence:.1f}%"
|
| 114 |
+
|
| 115 |
+
return confidence_dict, result_md
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
# ---------------------------------------------------------------------------
|
| 119 |
+
# Gradio UI
|
| 120 |
+
# ---------------------------------------------------------------------------
|
| 121 |
+
|
| 122 |
+
EXAMPLE_TEXTS = [
|
| 123 |
+
["Oh great, another rainy day in Manchester. Just what I needed.", "British English (en-UK)"],
|
| 124 |
+
["Yeah nah, best coffee I've ever had. Absolutely ripper.", "Australian English (en-AU)"],
|
| 125 |
+
["Only took 3 hours to get help. Very efficient service.", "Indian English (en-IN)"],
|
| 126 |
+
]
|
| 127 |
+
|
| 128 |
+
with gr.Blocks(title="BESSTIE Sarcasm Detector") as demo:
|
| 129 |
+
gr.Markdown(
|
| 130 |
+
"""
|
| 131 |
+
# Sarcasm Detector for Global English Varieties
|
| 132 |
+
Powered by LoRA adapters fine-tuned on the
|
| 133 |
+
[BESSTIE dataset](https://huggingface.co/datasets/surrey-nlp/BESSTIE-CW-26).
|
| 134 |
+
Select your English variety — the backend automatically switches to the adapter
|
| 135 |
+
trained on that dialect.
|
| 136 |
+
"""
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
with gr.Row():
|
| 140 |
+
with gr.Column(scale=2):
|
| 141 |
+
text_input = gr.Textbox(
|
| 142 |
+
label="Input Text",
|
| 143 |
+
placeholder='e.g. "Oh great, another rainy day in Manchester. Just what I needed."',
|
| 144 |
+
lines=4,
|
| 145 |
+
)
|
| 146 |
+
variety_radio = gr.Radio(
|
| 147 |
+
choices=list(VARIETY_CONFIG.keys()),
|
| 148 |
+
value="British English (en-UK)",
|
| 149 |
+
label="English Variety",
|
| 150 |
+
info="Loads the LoRA adapter fine-tuned specifically on that variety.",
|
| 151 |
+
)
|
| 152 |
+
submit_btn = gr.Button("Detect Sarcasm", variant="primary")
|
| 153 |
+
|
| 154 |
+
with gr.Column(scale=1):
|
| 155 |
+
label_output = gr.Label(label="Confidence Scores", num_top_classes=2)
|
| 156 |
+
text_output = gr.Markdown(label="Result")
|
| 157 |
+
|
| 158 |
+
gr.Examples(
|
| 159 |
+
examples=EXAMPLE_TEXTS,
|
| 160 |
+
inputs=[text_input, variety_radio],
|
| 161 |
+
label="Example inputs",
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
gr.Markdown(
|
| 165 |
+
"""
|
| 166 |
+
---
|
| 167 |
+
**First request per variety** downloads the base model + adapter (~1–3 GB each) and may
|
| 168 |
+
take a minute. Subsequent requests for the same variety are fast — the model stays cached
|
| 169 |
+
in memory. For the report: en-UK and en-IN share the same base model (Qwen2.5-1.5B),
|
| 170 |
+
so swapping between them only requires switching the small adapter weights rather than
|
| 171 |
+
reloading the full model.
|
| 172 |
+
"""
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
submit_btn.click(
|
| 176 |
+
fn=classify,
|
| 177 |
+
inputs=[text_input, variety_radio],
|
| 178 |
+
outputs=[label_output, text_output],
|
| 179 |
+
)
|
| 180 |
+
# Also trigger on Enter key inside the textbox
|
| 181 |
+
text_input.submit(
|
| 182 |
+
fn=classify,
|
| 183 |
+
inputs=[text_input, variety_radio],
|
| 184 |
+
outputs=[label_output, text_output],
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
if __name__ == "__main__":
|
| 189 |
+
parser = argparse.ArgumentParser()
|
| 190 |
+
parser.add_argument("--share", action="store_true", help="Create a public Gradio share link")
|
| 191 |
+
parser.add_argument("--port", type=int, default=7860)
|
| 192 |
+
args = parser.parse_args()
|
| 193 |
+
|
| 194 |
+
demo.launch(share=args.share, server_port=args.port, theme=gr.themes.Soft())
|
best_per_variety_bar.png
ADDED
|
confusion_matrices_side_by_side.png
ADDED
|
Git LFS Details
|
eval_outputs/all_runs.csv
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_short,train_var,test_var,split,seed,macro_f1,precision_macro,recall_macro,f1_sarcastic,f1_not_sarcastic,n
|
| 2 |
+
Llama-3.2-1B,en-AU,en-UK,val,123,0.5533169533169533,0.5653735632183908,0.6834677419354839,0.2702702702702703,0.8363636363636363,101
|
| 3 |
+
Llama-3.2-1B,en-AU,en-UK,test,123,0.5927467977157418,0.6005681818181818,0.8097022542358053,0.3443223443223443,0.8411712511091393,700
|
| 4 |
+
Llama-3.2-1B,en-AU,en-AU,val,123,0.7214076246334311,0.7149122807017544,0.7481343283582089,0.6363636363636364,0.8064516129032258,95
|
| 5 |
+
Llama-3.2-1B,en-AU,en-AU,test,123,0.7350779286073403,0.7268688871518179,0.7577992980631744,0.6488888888888888,0.8212669683257918,667
|
| 6 |
+
Llama-3.2-1B,en-AU,en-IN,val,123,0.5583830512601973,0.5735191637630662,0.7419724770642202,0.27906976744186046,0.837696335078534,117
|
| 7 |
+
Llama-3.2-1B,en-AU,en-IN,test,123,0.5062048884874104,0.5532893550132708,0.6943609022556391,0.22905027932960895,0.783359497645212,816
|
| 8 |
+
Llama-3.2-1B,en-AU,en-UK,val,2024,0.5868807756400546,0.5987076648841355,0.7977150537634409,0.34146341463414637,0.8322981366459627,101
|
| 9 |
+
Llama-3.2-1B,en-AU,en-UK,test,2024,0.6102711332322677,0.6097408861906665,0.8284097868245313,0.366412213740458,0.8541300527240774,700
|
| 10 |
+
Llama-3.2-1B,en-AU,en-AU,val,2024,0.7564102564102564,0.7490079365079365,0.767590618336887,0.6666666666666666,0.8461538461538461,95
|
| 11 |
+
Llama-3.2-1B,en-AU,en-AU,test,2024,0.762626106331621,0.7538793103448276,0.7775412712855843,0.677570093457944,0.847682119205298,667
|
| 12 |
+
Llama-3.2-1B,en-AU,en-IN,val,2024,0.5066964285714286,0.534727143869596,0.6123853211009174,0.19047619047619047,0.8229166666666666,117
|
| 13 |
+
Llama-3.2-1B,en-AU,en-IN,test,2024,0.5101406627037843,0.5458601496503401,0.6585526315789474,0.2153846153846154,0.8048967100229534,816
|
| 14 |
+
Llama-3.2-1B,en-AU,en-UK,val,42,0.5605756358768408,0.5687377690802348,0.6888440860215054,0.2777777777777778,0.8433734939759037,101
|
| 15 |
+
Llama-3.2-1B,en-AU,en-UK,test,42,0.5960450890624399,0.6046783625730994,0.8262517861829635,0.35251798561151076,0.839572192513369,700
|
| 16 |
+
Llama-3.2-1B,en-AU,en-AU,val,42,0.7366431451612903,0.7285438765670202,0.7526652452025586,0.6451612903225806,0.828125,95
|
| 17 |
+
Llama-3.2-1B,en-AU,en-AU,test,42,0.7350779286073403,0.7268688871518179,0.7577992980631744,0.6488888888888888,0.8212669683257918,667
|
| 18 |
+
Llama-3.2-1B,en-AU,en-IN,val,42,0.5167391304347826,0.5580952380952381,0.7098623853211009,0.24,0.7934782608695652,117
|
| 19 |
+
Llama-3.2-1B,en-AU,en-IN,test,42,0.5080886906973863,0.5660918826247919,0.75,0.2512820512820513,0.7648953301127214,816
|
| 20 |
+
Llama-3.2-1B,en-IN,en-UK,val,123,0.47668393782383417,0.46,0.4946236559139785,0.0,0.9533678756476683,101
|
| 21 |
+
Llama-3.2-1B,en-IN,en-UK,test,123,0.6025642349103446,0.6926470588235294,0.5764048875798314,0.2465753424657534,0.9585531273549359,700
|
| 22 |
+
Llama-3.2-1B,en-IN,en-AU,val,123,0.4363132911392405,0.4766483516483517,0.4954690831556503,0.0625,0.810126582278481,95
|
| 23 |
+
Llama-3.2-1B,en-IN,en-AU,test,123,0.5232012432012432,0.6930173207262991,0.5499750855756316,0.21367521367521367,0.8327272727272728,667
|
| 24 |
+
Llama-3.2-1B,en-IN,en-IN,val,123,0.7720779220779221,0.8153153153153153,0.7408256880733946,0.5714285714285714,0.9727272727272728,117
|
| 25 |
+
Llama-3.2-1B,en-IN,en-IN,test,123,0.5949336622642077,0.6374045801526718,0.5761278195488722,0.23255813953488372,0.9573091849935317,816
|
| 26 |
+
Llama-3.2-1B,en-IN,en-UK,val,2024,0.5319073083778967,0.5341945288753799,0.530241935483871,0.13333333333333333,0.93048128342246,101
|
| 27 |
+
Llama-3.2-1B,en-IN,en-UK,test,2024,0.6933442097948579,0.7006802721088435,0.6866816365810271,0.43137254901960786,0.9553158705701078,700
|
| 28 |
+
Llama-3.2-1B,en-IN,en-AU,val,2024,0.42684766214177977,0.43164794007490637,0.48054371002132196,0.058823529411764705,0.7948717948717948,95
|
| 29 |
+
Llama-3.2-1B,en-IN,en-AU,test,2024,0.49425488480606594,0.55623973727422,0.5215184800034663,0.18110236220472442,0.8074074074074075,667
|
| 30 |
+
Llama-3.2-1B,en-IN,en-IN,val,2024,0.5553319919517102,0.5480769230769231,0.5745412844036697,0.19047619047619047,0.92018779342723,117
|
| 31 |
+
Llama-3.2-1B,en-IN,en-IN,test,2024,0.6286003812926163,0.6064425770308124,0.6821428571428572,0.3291139240506329,0.9280868385345997,816
|
| 32 |
+
Llama-3.2-1B,en-IN,en-UK,val,42,0.6039215686274509,0.6109422492401215,0.5981182795698925,0.26666666666666666,0.9411764705882353,101
|
| 33 |
+
Llama-3.2-1B,en-IN,en-UK,test,42,0.6379196840026333,0.6495335029686174,0.6285322679420255,0.32653061224489793,0.9493087557603687,700
|
| 34 |
+
Llama-3.2-1B,en-IN,en-AU,val,42,0.4221198156682028,0.41801948051948057,0.47308102345415776,0.05714285714285714,0.7870967741935484,95
|
| 35 |
+
Llama-3.2-1B,en-IN,en-AU,test,42,0.5224372603536317,0.6084271855486173,0.542132458078773,0.22745098039215686,0.8174235403151066,667
|
| 36 |
+
Llama-3.2-1B,en-IN,en-IN,val,42,0.607645875251509,0.5913461538461539,0.6416284403669725,0.2857142857142857,0.9295774647887324,117
|
| 37 |
+
Llama-3.2-1B,en-IN,en-IN,test,42,0.657258064516129,0.6334915084915085,0.7009398496240602,0.375,0.9395161290322581,816
|
| 38 |
+
Llama-3.2-1B,en-UK,en-UK,val,123,0.7122507122507122,0.7739583333333333,0.676747311827957,0.46153846153846156,0.9629629629629629,101
|
| 39 |
+
Llama-3.2-1B,en-UK,en-UK,test,123,0.7233313167977414,0.7510131712259371,0.7022979790615613,0.4842105263157895,0.9624521072796934,700
|
| 40 |
+
Llama-3.2-1B,en-UK,en-AU,val,123,0.4803732303732304,0.4947089947089947,0.4968017057569296,0.19047619047619047,0.7702702702702703,95
|
| 41 |
+
Llama-3.2-1B,en-UK,en-AU,test,123,0.5423959004228389,0.615546218487395,0.5536201741843234,0.26865671641791045,0.8161350844277674,667
|
| 42 |
+
Llama-3.2-1B,en-UK,en-IN,val,123,0.6990740740740741,0.6813084112149532,0.7224770642201834,0.4444444444444444,0.9537037037037037,117
|
| 43 |
+
Llama-3.2-1B,en-UK,en-IN,test,123,0.6454165954641994,0.6263794406651549,0.6767857142857143,0.35036496350364965,0.9404682274247491,816
|
| 44 |
+
Llama-3.2-1B,en-UK,en-UK,val,2024,0.7109697933227346,0.7004830917874396,0.7231182795698925,0.47058823529411764,0.9513513513513514,101
|
| 45 |
+
Llama-3.2-1B,en-UK,en-UK,test,2024,0.7567902353545425,0.7665130568356375,0.7479221953282202,0.5490196078431373,0.9645608628659477,700
|
| 46 |
+
Llama-3.2-1B,en-UK,en-AU,val,2024,0.4736842105263158,0.5029411764705882,0.5013326226012793,0.15789473684210525,0.7894736842105263,95
|
| 47 |
+
Llama-3.2-1B,en-UK,en-AU,test,2024,0.529786658311496,0.6059849745373872,0.5455392348022011,0.24427480916030533,0.8152985074626866,667
|
| 48 |
+
Llama-3.2-1B,en-UK,en-IN,val,2024,0.5703794369645043,0.5626072041166381,0.5837155963302753,0.21052631578947367,0.9302325581395349,117
|
| 49 |
+
Llama-3.2-1B,en-UK,en-IN,test,2024,0.5842140732096188,0.5706670100437805,0.6289473684210527,0.25301204819277107,0.9154160982264665,816
|
| 50 |
+
Llama-3.2-1B,en-UK,en-UK,val,42,0.6759358288770054,0.6876899696048632,0.665994623655914,0.4,0.9518716577540107,101
|
| 51 |
+
Llama-3.2-1B,en-UK,en-UK,test,42,0.7150127226463104,0.7596510537687008,0.6857484471144032,0.4666666666666667,0.9633587786259542,700
|
| 52 |
+
Llama-3.2-1B,en-UK,en-AU,val,42,0.48593073593073594,0.5438218390804598,0.5162579957356077,0.16666666666666666,0.8051948051948052,95
|
| 53 |
+
Llama-3.2-1B,en-UK,en-AU,test,42,0.5443177092615695,0.6743589743589744,0.5604012305559166,0.25806451612903225,0.8305709023941068,667
|
| 54 |
+
Llama-3.2-1B,en-UK,en-IN,val,42,0.6849449204406365,0.66295025728988,0.7178899082568808,0.42105263157894735,0.9488372093023256,117
|
| 55 |
+
Llama-3.2-1B,en-UK,en-IN,test,42,0.65115405376228,0.6246832375545247,0.7115601503759399,0.36942675159235666,0.9328813559322033,816
|
| 56 |
+
OLMo-2-0425-1B,en-AU,en-UK,val,123,0.6379928315412187,0.6176142697881828,0.7836021505376345,0.3870967741935484,0.8888888888888888,101
|
| 57 |
+
OLMo-2-0425-1B,en-AU,en-UK,test,123,0.5825799253645212,0.5948538011695906,0.7956315068093669,0.33093525179856115,0.8342245989304813,700
|
| 58 |
+
OLMo-2-0425-1B,en-AU,en-AU,val,123,0.7418812265493595,0.7361391129032258,0.7497334754797442,0.6440677966101694,0.8396946564885496,95
|
| 59 |
+
OLMo-2-0425-1B,en-AU,en-AU,test,123,0.7490441326011796,0.7400786730225168,0.7684150093158283,0.6636363636363637,0.8344519015659956,667
|
| 60 |
+
OLMo-2-0425-1B,en-AU,en-IN,val,123,0.4615384615384615,0.4973743435858965,0.4919724770642202,0.10256410256410256,0.8205128205128205,117
|
| 61 |
+
OLMo-2-0425-1B,en-AU,en-IN,test,123,0.5097588498349146,0.5345506623702112,0.6088345864661654,0.19014084507042253,0.8293768545994066,816
|
| 62 |
+
OLMo-2-0425-1B,en-AU,en-UK,val,2024,0.5533169533169533,0.5653735632183908,0.6834677419354839,0.2702702702702703,0.8363636363636363,101
|
| 63 |
+
OLMo-2-0425-1B,en-AU,en-UK,test,2024,0.5779906911617897,0.5901529392871651,0.7783091773351609,0.32116788321167883,0.8348134991119005,700
|
| 64 |
+
OLMo-2-0425-1B,en-AU,en-AU,val,2024,0.7766457680250785,0.7717948717948717,0.7825159914712154,0.6896551724137931,0.8636363636363636,95
|
| 65 |
+
OLMo-2-0425-1B,en-AU,en-AU,test,2024,0.7681856255235096,0.7584506775683246,0.7958425408379912,0.6945054945054945,0.8418657565415245,667
|
| 66 |
+
OLMo-2-0425-1B,en-AU,en-IN,val,2024,0.5778350515463917,0.5819852941176471,0.7557339449541285,0.3,0.8556701030927835,117
|
| 67 |
+
OLMo-2-0425-1B,en-AU,en-IN,test,2024,0.5172944480781809,0.5484228002213614,0.6644736842105263,0.22151898734177214,0.8130699088145896,816
|
| 68 |
+
OLMo-2-0425-1B,en-AU,en-UK,val,42,0.6194922194922194,0.6137452107279694,0.8192204301075269,0.3783783783783784,0.8606060606060606,101
|
| 69 |
+
OLMo-2-0425-1B,en-AU,en-UK,test,42,0.5902088400937917,0.6036187227174351,0.8295033682307311,0.34843205574912894,0.8319856244384546,700
|
| 70 |
+
OLMo-2-0425-1B,en-AU,en-AU,val,42,0.7127016129032258,0.7056412729026036,0.7273454157782516,0.6129032258064516,0.8125,95
|
| 71 |
+
OLMo-2-0425-1B,en-AU,en-AU,test,42,0.7511441647597255,0.7426921197082488,0.7796969106113784,0.6739130434782609,0.8283752860411899,667
|
| 72 |
+
OLMo-2-0425-1B,en-AU,en-IN,val,42,0.5538461538461539,0.5632033008252063,0.6932339449541285,0.2564102564102564,0.8512820512820513,117
|
| 73 |
+
OLMo-2-0425-1B,en-AU,en-IN,test,42,0.5040139219914501,0.5337434602200974,0.6098684210526315,0.18855218855218855,0.8194756554307117,816
|
| 74 |
+
OLMo-2-0425-1B,en-IN,en-UK,val,123,0.6162613981762918,0.6350877192982456,0.603494623655914,0.2857142857142857,0.9468085106382979,101
|
| 75 |
+
OLMo-2-0425-1B,en-IN,en-UK,test,123,0.6407206824537184,0.6828858298123666,0.6173923186842029,0.32558139534883723,0.9558599695585996,700
|
| 76 |
+
OLMo-2-0425-1B,en-IN,en-AU,val,123,0.5415942868172168,0.7666666666666666,0.5639658848614072,0.24242424242424243,0.8407643312101911,95
|
| 77 |
+
OLMo-2-0425-1B,en-IN,en-AU,test,123,0.5170503165353416,0.670749632533072,0.5453009229169374,0.20425531914893616,0.8298453139217471,667
|
| 78 |
+
OLMo-2-0425-1B,en-IN,en-IN,val,123,0.6645642201834863,0.6645642201834863,0.6645642201834863,0.375,0.9541284403669725,117
|
| 79 |
+
OLMo-2-0425-1B,en-IN,en-IN,test,123,0.6033829104695246,0.6071284736206202,0.6,0.25925925925925924,0.94750656167979,816
|
| 80 |
+
OLMo-2-0425-1B,en-IN,en-UK,val,2024,0.6223756111590452,0.6085858585858586,0.644489247311828,0.3157894736842105,0.9289617486338798,101
|
| 81 |
+
OLMo-2-0425-1B,en-IN,en-UK,test,2024,0.6770926675114927,0.6458234456573327,0.7508821556676679,0.423841059602649,0.9303442754203363,700
|
| 82 |
+
OLMo-2-0425-1B,en-IN,en-AU,val,2024,0.5859277708592777,0.6234177215189873,0.5831556503198294,0.36363636363636365,0.8082191780821918,95
|
| 83 |
+
OLMo-2-0425-1B,en-IN,en-AU,test,2024,0.6268151878131278,0.6778602905173821,0.6180456258936695,0.42483660130718953,0.8287937743190662,667
|
| 84 |
+
OLMo-2-0425-1B,en-IN,en-IN,val,2024,0.6641148325358852,0.6320588235294118,0.757454128440367,0.4,0.9282296650717703,117
|
| 85 |
+
OLMo-2-0425-1B,en-IN,en-IN,test,2024,0.5982564213408263,0.5815232897103516,0.6544172932330827,0.2807017543859649,0.9158110882956879,816
|
| 86 |
+
OLMo-2-0425-1B,en-IN,en-UK,val,42,0.6950483091787439,0.6780219780219781,0.717741935483871,0.4444444444444444,0.9456521739130435,101
|
| 87 |
+
OLMo-2-0425-1B,en-IN,en-UK,test,42,0.712665794402114,0.6911274948896391,0.7426730045784609,0.47540983606557374,0.9499217527386542,700
|
| 88 |
+
OLMo-2-0425-1B,en-IN,en-AU,val,42,0.43538801154695195,0.43614718614718617,0.46855010660980806,0.10256410256410256,0.7682119205298014,95
|
| 89 |
+
OLMo-2-0425-1B,en-IN,en-AU,test,42,0.5582766756852996,0.6098137973137974,0.5623510550717102,0.30662020905923343,0.8099331423113658,667
|
| 90 |
+
OLMo-2-0425-1B,en-IN,en-IN,val,42,0.7160194174757282,0.6698453608247423,0.8778669724770642,0.5,0.9320388349514563,117
|
| 91 |
+
OLMo-2-0425-1B,en-IN,en-IN,test,42,0.6033979967034361,0.5863459689890083,0.694172932330827,0.30303030303030304,0.9037656903765691,816
|
| 92 |
+
OLMo-2-0425-1B,en-UK,en-UK,val,123,0.7479500891265597,0.7644376899696048,0.7338709677419355,0.5333333333333333,0.9625668449197861,101
|
| 93 |
+
OLMo-2-0425-1B,en-UK,en-UK,test,123,0.7120526532291238,0.7521466051836541,0.6849756495873554,0.46153846153846156,0.9625668449197861,700
|
| 94 |
+
OLMo-2-0425-1B,en-UK,en-AU,val,123,0.46824937270797146,0.5555555555555556,0.5133262260127932,0.12121212121212122,0.8152866242038217,95
|
| 95 |
+
OLMo-2-0425-1B,en-UK,en-AU,test,123,0.533385153517295,0.6478397148300657,0.552114476363794,0.24096385542168675,0.8258064516129032,667
|
| 96 |
+
OLMo-2-0425-1B,en-UK,en-IN,val,123,0.5441558441558442,0.5518018018018018,0.5395642201834863,0.14285714285714285,0.9454545454545454,117
|
| 97 |
+
OLMo-2-0425-1B,en-UK,en-IN,test,123,0.572556602492385,0.5812797233030695,0.5662593984962406,0.19801980198019803,0.9470934030045721,816
|
| 98 |
+
OLMo-2-0425-1B,en-UK,en-UK,val,2024,0.6606182795698925,0.6606182795698925,0.6606182795698925,0.375,0.946236559139785,101
|
| 99 |
+
OLMo-2-0425-1B,en-UK,en-UK,test,2024,0.6941508104298801,0.68797577146599,0.7009127759470415,0.43636363636363634,0.951937984496124,700
|
| 100 |
+
OLMo-2-0425-1B,en-UK,en-AU,val,2024,0.506578947368421,0.5588235294117647,0.5266524520255863,0.21052631578947367,0.8026315789473685,95
|
| 101 |
+
OLMo-2-0425-1B,en-UK,en-AU,test,2024,0.5603284276390165,0.6278599418531159,0.5657415832575068,0.30324909747292417,0.8174077578051088,667
|
| 102 |
+
OLMo-2-0425-1B,en-UK,en-IN,val,2024,0.7122736418511066,0.6778846153846154,0.775802752293578,0.47619047619047616,0.9483568075117371,117
|
| 103 |
+
OLMo-2-0425-1B,en-UK,en-IN,test,2024,0.5899861082375196,0.5753867791842475,0.6322368421052631,0.2608695652173913,0.9191026512576479,816
|
| 104 |
+
OLMo-2-0425-1B,en-UK,en-UK,val,42,0.760852407261247,0.7194055944055944,0.8373655913978495,0.5714285714285714,0.9502762430939227,101
|
| 105 |
+
OLMo-2-0425-1B,en-UK,en-UK,test,42,0.7341966944461098,0.7162932389937107,0.7567437520048992,0.5128205128205128,0.9555728760717069,700
|
| 106 |
+
OLMo-2-0425-1B,en-UK,en-AU,val,42,0.4933333333333333,0.5220883534136546,0.511727078891258,0.2,0.7866666666666666,95
|
| 107 |
+
OLMo-2-0425-1B,en-UK,en-AU,test,42,0.5864876280793547,0.6661872003335418,0.5863555613328134,0.34532374100719426,0.8276515151515151,667
|
| 108 |
+
OLMo-2-0425-1B,en-UK,en-IN,val,42,0.6511249661154784,0.6435185185185185,0.6599770642201834,0.35294117647058826,0.9493087557603687,117
|
| 109 |
+
OLMo-2-0425-1B,en-UK,en-IN,test,42,0.5850752422180994,0.5726942023493747,0.612687969924812,0.24489795918367346,0.9252525252525252,816
|
| 110 |
+
Qwen2.5-1.5B,en-AU,en-UK,val,123,0.5943775100401606,0.5934442270058709,0.7567204301075269,0.3333333333333333,0.8554216867469879,101
|
| 111 |
+
Qwen2.5-1.5B,en-AU,en-UK,test,123,0.595043780375802,0.6015740987475732,0.8112478492899011,0.34686346863468637,0.8432240921169176,700
|
| 112 |
+
Qwen2.5-1.5B,en-AU,en-AU,val,123,0.7223281230143601,0.7152981427174976,0.7348081023454158,0.6229508196721312,0.8217054263565892,95
|
| 113 |
+
Qwen2.5-1.5B,en-AU,en-AU,test,123,0.7083176400702721,0.701322034549682,0.7269812383552148,0.6112359550561798,0.8053993250843644,667
|
| 114 |
+
Qwen2.5-1.5B,en-AU,en-IN,val,123,0.540009250693802,0.5662891405729513,0.7282110091743119,0.2608695652173913,0.8191489361702128,117
|
| 115 |
+
Qwen2.5-1.5B,en-AU,en-IN,test,123,0.49798092406788064,0.5584888314656993,0.7212406015037593,0.2358974358974359,0.7600644122383253,816
|
| 116 |
+
Qwen2.5-1.5B,en-AU,en-UK,val,2024,0.6345760578889413,0.6290322580645161,0.8763440860215054,0.41025641025641024,0.8588957055214724,101
|
| 117 |
+
Qwen2.5-1.5B,en-AU,en-UK,test,2024,0.5980362461507396,0.6082574142275635,0.8420285206030738,0.3591549295774648,0.8369175627240143,700
|
| 118 |
+
Qwen2.5-1.5B,en-AU,en-AU,val,2024,0.7366431451612903,0.7285438765670202,0.7526652452025586,0.6451612903225806,0.828125,95
|
| 119 |
+
Qwen2.5-1.5B,en-AU,en-AU,test,2024,0.7144178175028438,0.7070898399845769,0.7327169721391742,0.618510158013544,0.8103254769921436,667
|
| 120 |
+
Qwen2.5-1.5B,en-AU,en-IN,val,2024,0.5583830512601973,0.5735191637630662,0.7419724770642202,0.27906976744186046,0.837696335078534,117
|
| 121 |
+
Qwen2.5-1.5B,en-AU,en-IN,test,2024,0.49029982363315694,0.5347547974413646,0.6225563909774436,0.19047619047619047,0.7901234567901234,816
|
| 122 |
+
Qwen2.5-1.5B,en-AU,en-UK,val,42,0.619688004303389,0.6068421052631578,0.7728494623655914,0.36363636363636365,0.8757396449704142,101
|
| 123 |
+
Qwen2.5-1.5B,en-AU,en-UK,test,42,0.580303264737738,0.5925111167517463,0.7869703420722638,0.32608695652173914,0.8345195729537367,700
|
| 124 |
+
Qwen2.5-1.5B,en-AU,en-AU,val,42,0.7076923076923077,0.701304753028891,0.7302771855010661,0.6153846153846154,0.8,95
|
| 125 |
+
Qwen2.5-1.5B,en-AU,en-AU,test,42,0.6914573880512127,0.6853010126630925,0.7089182806880714,0.5887640449438202,0.7941507311586051,667
|
| 126 |
+
Qwen2.5-1.5B,en-AU,en-IN,val,42,0.5361620057859209,0.5746385920804525,0.7723623853211009,0.27450980392156865,0.7978142076502732,117
|
| 127 |
+
Qwen2.5-1.5B,en-AU,en-IN,test,42,0.4725362150694467,0.5386338743403366,0.6458646616541354,0.1958762886597938,0.7491961414790996,816
|
| 128 |
+
Qwen2.5-1.5B,en-IN,en-UK,val,123,0.5478225478225478,0.5635416666666667,0.540994623655914,0.15384615384615385,0.9417989417989417,101
|
| 129 |
+
Qwen2.5-1.5B,en-IN,en-UK,test,123,0.5428315813017222,0.5585703592814371,0.5365110378816599,0.1411764705882353,0.9444866920152091,700
|
| 130 |
+
Qwen2.5-1.5B,en-IN,en-AU,val,123,0.40625,0.34946236559139787,0.48507462686567165,0.0,0.8125,95
|
| 131 |
+
Qwen2.5-1.5B,en-IN,en-AU,test,123,0.4851058032876215,0.5640096618357487,0.5198069673729364,0.15702479338842976,0.8131868131868132,667
|
| 132 |
+
Qwen2.5-1.5B,en-IN,en-IN,val,123,0.6645642201834863,0.6645642201834863,0.6645642201834863,0.375,0.9541284403669725,117
|
| 133 |
+
Qwen2.5-1.5B,en-IN,en-IN,test,123,0.633319142897687,0.6084940022339858,0.7099624060150376,0.3448275862068966,0.9218106995884774,816
|
| 134 |
+
Qwen2.5-1.5B,en-IN,en-UK,val,2024,0.5248655913978495,0.5248655913978495,0.5248655913978495,0.125,0.9247311827956989,101
|
| 135 |
+
Qwen2.5-1.5B,en-IN,en-UK,test,2024,0.6819938635299381,0.6749113760876571,0.6899332186287948,0.4144144144144144,0.9495733126454616,700
|
| 136 |
+
Qwen2.5-1.5B,en-IN,en-AU,val,2024,0.45165945165945165,0.4755747126436782,0.4909381663113006,0.1111111111111111,0.7922077922077922,95
|
| 137 |
+
Qwen2.5-1.5B,en-IN,en-AU,test,2024,0.5098556237572404,0.5780725139962677,0.5317225616361194,0.20930232558139536,0.8104089219330854,667
|
| 138 |
+
Qwen2.5-1.5B,en-IN,en-IN,val,2024,0.7993138936535162,0.7451456310679612,0.9053899082568808,0.6363636363636364,0.9622641509433962,117
|
| 139 |
+
Qwen2.5-1.5B,en-IN,en-IN,test,2024,0.6018735362997658,0.5844155844155844,0.656390977443609,0.2857142857142857,0.9180327868852459,816
|
| 140 |
+
Qwen2.5-1.5B,en-IN,en-UK,val,42,0.5319073083778967,0.5341945288753799,0.530241935483871,0.13333333333333333,0.93048128342246,101
|
| 141 |
+
Qwen2.5-1.5B,en-IN,en-UK,test,42,0.5917296083520457,0.5917296083520457,0.5917296083520457,0.24528301886792453,0.9381761978361669,700
|
| 142 |
+
Qwen2.5-1.5B,en-IN,en-AU,val,42,0.499915095941586,0.538961038961039,0.5191897654584222,0.20512820512820512,0.7947019867549668,95
|
| 143 |
+
Qwen2.5-1.5B,en-IN,en-AU,test,42,0.5069485511531637,0.5592964824120603,0.5268425841674249,0.21052631578947367,0.8033707865168539,667
|
| 144 |
+
Qwen2.5-1.5B,en-IN,en-IN,val,42,0.6989708404802745,0.6640083217753121,0.7712155963302753,0.45454545454545453,0.9433962264150944,117
|
| 145 |
+
Qwen2.5-1.5B,en-IN,en-IN,test,42,0.5883122051048832,0.5749788672865596,0.6667293233082707,0.2755102040816326,0.9011142061281338,816
|
| 146 |
+
Qwen2.5-1.5B,en-UK,en-UK,val,123,0.6670329670329671,0.6441947565543071,0.706989247311828,0.4,0.9340659340659341,101
|
| 147 |
+
Qwen2.5-1.5B,en-UK,en-UK,test,123,0.6367753195905468,0.6133333333333333,0.6983027616575778,0.35294117647058826,0.9206094627105053,700
|
| 148 |
+
Qwen2.5-1.5B,en-UK,en-AU,val,123,0.5486590038314176,0.5712669683257918,0.5503731343283582,0.3111111111111111,0.7862068965517242,95
|
| 149 |
+
Qwen2.5-1.5B,en-UK,en-AU,test,123,0.6104072853875038,0.6208834744713514,0.6057671476233806,0.428169014084507,0.7926455566905005,667
|
| 150 |
+
Qwen2.5-1.5B,en-UK,en-IN,val,123,0.6379950495049505,0.614247311827957,0.7924311926605505,0.375,0.900990099009901,117
|
| 151 |
+
Qwen2.5-1.5B,en-UK,en-IN,test,123,0.5751045943810473,0.5819624819624819,0.7602443609022557,0.29965156794425085,0.8505576208178439,816
|
| 152 |
+
Qwen2.5-1.5B,en-UK,en-UK,val,2024,0.7635767790262172,0.7128676470588236,0.8891129032258065,0.5833333333333334,0.9438202247191011,101
|
| 153 |
+
Qwen2.5-1.5B,en-UK,en-UK,test,2024,0.6864827073911428,0.654744822943792,0.7547461433029075,0.4383561643835616,0.9346092503987241,700
|
| 154 |
+
Qwen2.5-1.5B,en-UK,en-AU,val,2024,0.5052819520904627,0.513157894736842,0.5101279317697228,0.2553191489361702,0.7552447552447552,95
|
| 155 |
+
Qwen2.5-1.5B,en-UK,en-AU,test,2024,0.5886485592940159,0.6288722049942875,0.5855323020928116,0.3660130718954248,0.811284046692607,667
|
| 156 |
+
Qwen2.5-1.5B,en-UK,en-IN,val,2024,0.6383680197815784,0.6137254901960785,0.6995412844036697,0.34782608695652173,0.9289099526066351,117
|
| 157 |
+
Qwen2.5-1.5B,en-UK,en-IN,test,2024,0.5687712330340877,0.5628356451566583,0.6542293233082707,0.25116279069767444,0.8863796753705011,816
|
| 158 |
+
Qwen2.5-1.5B,en-UK,en-UK,val,42,0.7449494949494949,0.7027914614121511,0.831989247311828,0.5454545454545454,0.9444444444444444,101
|
| 159 |
+
Qwen2.5-1.5B,en-UK,en-UK,test,42,0.6253723008190618,0.6074085645662065,0.660406520661398,0.3235294117647059,0.9272151898734177,700
|
| 160 |
+
Qwen2.5-1.5B,en-UK,en-AU,val,42,0.5737443913735707,0.5859073359073359,0.5711620469083156,0.3673469387755102,0.7801418439716312,95
|
| 161 |
+
Qwen2.5-1.5B,en-UK,en-AU,test,42,0.5669408991861964,0.5800192174237976,0.5649508210927683,0.35398230088495575,0.7798994974874371,667
|
| 162 |
+
Qwen2.5-1.5B,en-UK,en-IN,val,42,0.6558823529411765,0.6258373205741627,0.801605504587156,0.4,0.9117647058823529,117
|
| 163 |
+
Qwen2.5-1.5B,en-UK,en-IN,test,42,0.5435938330675173,0.553960020242915,0.6603383458646617,0.23484848484848486,0.8523391812865497,816
|
| 164 |
+
SmolLM2-1.7B,en-AU,en-UK,val,123,0.637125748502994,0.6228728728728729,0.8299731182795699,0.4,0.874251497005988,101
|
| 165 |
+
SmolLM2-1.7B,en-AU,en-UK,test,123,0.5911773842808326,0.6054351094950379,0.8373917354407863,0.35172413793103446,0.8306306306306306,700
|
| 166 |
+
SmolLM2-1.7B,en-AU,en-AU,val,123,0.7366431451612903,0.7285438765670202,0.7526652452025586,0.6451612903225806,0.828125,95
|
| 167 |
+
SmolLM2-1.7B,en-AU,en-AU,test,123,0.7433515103241223,0.7372658241880058,0.778223709866112,0.6694736842105263,0.8172293364377182,667
|
| 168 |
+
SmolLM2-1.7B,en-AU,en-IN,val,123,0.5583830512601973,0.5735191637630662,0.7419724770642202,0.27906976744186046,0.837696335078534,117
|
| 169 |
+
SmolLM2-1.7B,en-AU,en-IN,test,123,0.49055935206276896,0.5289194041438207,0.5980263157894736,0.17777777777777778,0.8033409263477601,816
|
| 170 |
+
SmolLM2-1.7B,en-AU,en-UK,val,2024,0.6110397946084724,0.60962441314554,0.8138440860215054,0.3684210526315789,0.8536585365853658,101
|
| 171 |
+
SmolLM2-1.7B,en-AU,en-UK,test,2024,0.5982142857142857,0.6069609112330145,0.8349129509200666,0.35714285714285715,0.8392857142857143,700
|
| 172 |
+
SmolLM2-1.7B,en-AU,en-AU,val,2024,0.7506561679790026,0.7416666666666667,0.7705223880597014,0.6666666666666666,0.8346456692913385,95
|
| 173 |
+
SmolLM2-1.7B,en-AU,en-AU,test,2024,0.727095540737912,0.7202762172284645,0.7548366480350102,0.6436285097192225,0.8105625717566016,667
|
| 174 |
+
SmolLM2-1.7B,en-AU,en-IN,val,2024,0.5421739130434783,0.5766666666666667,0.7769495412844036,0.28,0.8043478260869565,117
|
| 175 |
+
SmolLM2-1.7B,en-AU,en-IN,test,2024,0.4959935897435897,0.5547780887644942,0.7060150375939849,0.22916666666666666,0.7628205128205128,816
|
| 176 |
+
SmolLM2-1.7B,en-AU,en-UK,val,42,0.6025662959794696,0.5975975975975976,0.7620967741935484,0.34285714285714286,0.8622754491017964,101
|
| 177 |
+
SmolLM2-1.7B,en-AU,en-UK,test,42,0.5913422304804711,0.6040841459043662,0.8302761657577791,0.34965034965034963,0.8330341113105925,700
|
| 178 |
+
SmolLM2-1.7B,en-AU,en-AU,val,42,0.7031621047369079,0.6964285714285714,0.7198827292110874,0.6031746031746031,0.8031496062992126,95
|
| 179 |
+
SmolLM2-1.7B,en-AU,en-AU,test,42,0.7358077904313447,0.7295222634508349,0.768019628233459,0.6581740976645435,0.813441483198146,667
|
| 180 |
+
SmolLM2-1.7B,en-AU,en-IN,val,42,0.5345040471944025,0.5462382445141066,0.6353211009174312,0.21621621621621623,0.8527918781725888,117
|
| 181 |
+
SmolLM2-1.7B,en-AU,en-IN,test,42,0.5029044648979041,0.5319188474235369,0.6029135338345865,0.18430034129692832,0.8215085884988798,816
|
| 182 |
+
SmolLM2-1.7B,en-IN,en-UK,val,123,0.63003663003663,0.66875,0.6088709677419355,0.3076923076923077,0.9523809523809523,101
|
| 183 |
+
SmolLM2-1.7B,en-IN,en-UK,test,123,0.653310327198364,0.6702771583306786,0.6402846227873202,0.3541666666666667,0.9524539877300614,700
|
| 184 |
+
SmolLM2-1.7B,en-IN,en-AU,val,123,0.4796104686548996,0.6920289855072463,0.5282515991471215,0.12903225806451613,0.8301886792452831,95
|
| 185 |
+
SmolLM2-1.7B,en-IN,en-AU,test,123,0.4894668620595487,0.5903335151562379,0.5251148229992634,0.16033755274261605,0.8185961713764813,667
|
| 186 |
+
SmolLM2-1.7B,en-IN,en-IN,val,123,0.675,0.6413985148514851,0.7620412844036697,0.4166666666666667,0.9333333333333333,117
|
| 187 |
+
SmolLM2-1.7B,en-IN,en-IN,test,123,0.5964276225705586,0.5801993482844547,0.6474624060150376,0.2754491017964072,0.9174061433447099,816
|
| 188 |
+
SmolLM2-1.7B,en-IN,en-UK,val,2024,0.6606182795698925,0.6606182795698925,0.6606182795698925,0.375,0.946236559139785,101
|
| 189 |
+
SmolLM2-1.7B,en-IN,en-UK,test,2024,0.644235294117647,0.6280962491153574,0.6689072934589251,0.352,0.9364705882352942,700
|
| 190 |
+
SmolLM2-1.7B,en-IN,en-AU,val,2024,0.5049218297625941,0.6611111111111111,0.5386460554371002,0.18181818181818182,0.8280254777070064,95
|
| 191 |
+
SmolLM2-1.7B,en-IN,en-AU,test,2024,0.5506536118819485,0.6272773396061067,0.5597837861259154,0.2825278810408922,0.8187793427230047,667
|
| 192 |
+
SmolLM2-1.7B,en-IN,en-IN,val,2024,0.6971153846153846,0.6565656565656566,0.819954128440367,0.46153846153846156,0.9326923076923077,117
|
| 193 |
+
SmolLM2-1.7B,en-IN,en-IN,test,2024,0.6111058157109982,0.5927327327327327,0.7176691729323308,0.32038834951456313,0.9018232819074333,816
|
| 194 |
+
SmolLM2-1.7B,en-IN,en-UK,val,42,0.6576271186440679,0.6292016806722689,0.747983870967742,0.4,0.9152542372881356,101
|
| 195 |
+
SmolLM2-1.7B,en-IN,en-UK,test,42,0.6403960957290393,0.6157059314954052,0.7544545215945875,0.3763440860215054,0.9044481054365733,700
|
| 196 |
+
SmolLM2-1.7B,en-IN,en-AU,val,42,0.5763656633221851,0.5815727699530516,0.57409381663113,0.38461538461538464,0.7681159420289855,95
|
| 197 |
+
SmolLM2-1.7B,en-IN,en-AU,test,42,0.6537011345549872,0.6634610778443114,0.6478508600892586,0.49586776859504134,0.811534500514933,667
|
| 198 |
+
SmolLM2-1.7B,en-IN,en-IN,val,42,0.6285714285714286,0.6051980198019802,0.694954128440367,0.3333333333333333,0.9238095238095239,117
|
| 199 |
+
SmolLM2-1.7B,en-IN,en-IN,test,42,0.5883122051048832,0.5749788672865596,0.6667293233082707,0.2755102040816326,0.9011142061281338,816
|
| 200 |
+
SmolLM2-1.7B,en-UK,en-UK,val,123,0.6670329670329671,0.6441947565543071,0.706989247311828,0.4,0.9340659340659341,101
|
| 201 |
+
SmolLM2-1.7B,en-UK,en-UK,test,123,0.6974105937921727,0.661792635170339,0.7807296375142165,0.4605263157894737,0.9342948717948718,700
|
| 202 |
+
SmolLM2-1.7B,en-UK,en-AU,val,123,0.5321786381389031,0.5903679653679653,0.5445095948827292,0.2564102564102564,0.8079470198675497,95
|
| 203 |
+
SmolLM2-1.7B,en-UK,en-AU,test,123,0.6433687299495163,0.686654478976234,0.6327180553750162,0.45569620253164556,0.831041257367387,667
|
| 204 |
+
SmolLM2-1.7B,en-UK,en-IN,val,123,0.6865856171440347,0.6519607843137255,0.7666284403669725,0.43478260869565216,0.9383886255924171,117
|
| 205 |
+
SmolLM2-1.7B,en-UK,en-IN,test,123,0.5551364413272823,0.5494417862838915,0.6048872180451128,0.2127659574468085,0.8975069252077562,816
|
| 206 |
+
SmolLM2-1.7B,en-UK,en-UK,val,2024,0.7225274725274725,0.6914794007490637,0.7748655913978495,0.5,0.945054945054945,101
|
| 207 |
+
SmolLM2-1.7B,en-UK,en-UK,test,2024,0.7073689811119979,0.6713187276174426,0.7845936251494561,0.47619047619047616,0.9385474860335196,700
|
| 208 |
+
SmolLM2-1.7B,en-UK,en-AU,val,2024,0.5490260271730234,0.5966228893058161,0.5549040511727079,0.2926829268292683,0.8053691275167785,95
|
| 209 |
+
SmolLM2-1.7B,en-UK,en-AU,test,2024,0.6159324540561509,0.6426720475785896,0.6091414272715456,0.42073170731707316,0.8111332007952287,667
|
| 210 |
+
SmolLM2-1.7B,en-UK,en-IN,val,2024,0.704793944491169,0.6614583333333334,0.8732798165137614,0.4827586206896552,0.926829268292683,117
|
| 211 |
+
SmolLM2-1.7B,en-UK,en-IN,test,2024,0.5663695998745525,0.5670331589584079,0.6893796992481203,0.26506024096385544,0.8676789587852495,816
|
| 212 |
+
SmolLM2-1.7B,en-UK,en-UK,val,42,0.6690074906367042,0.6386029411764707,0.7533602150537635,0.4166666666666667,0.9213483146067416,101
|
| 213 |
+
SmolLM2-1.7B,en-UK,en-UK,test,42,0.6897959183673469,0.6526887514890237,0.8139890933481089,0.45714285714285713,0.9224489795918367,700
|
| 214 |
+
SmolLM2-1.7B,en-UK,en-AU,val,42,0.5539906103286385,0.5666666666666667,0.5533049040511727,0.3333333333333333,0.7746478873239436,95
|
| 215 |
+
SmolLM2-1.7B,en-UK,en-AU,test,42,0.6746341463414633,0.6931153641679957,0.6652638762511374,0.52,0.8292682926829268,667
|
| 216 |
+
SmolLM2-1.7B,en-UK,en-IN,val,42,0.67196261682243,0.6476190476190476,0.713302752293578,0.4,0.9439252336448598,117
|
| 217 |
+
SmolLM2-1.7B,en-UK,en-IN,test,42,0.5524905538162582,0.5455860970712456,0.5773496240601503,0.1910828025477707,0.9138983050847458,816
|
eval_outputs/best_adapter_per_variety.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"en-UK": {
|
| 3 |
+
"base_short": "Qwen2.5-1.5B",
|
| 4 |
+
"train_var": "en-UK",
|
| 5 |
+
"selected_on_val_mean_macro_f1": 0.7252,
|
| 6 |
+
"reported_test_mean_macro_f1": 0.6495
|
| 7 |
+
},
|
| 8 |
+
"en-AU": {
|
| 9 |
+
"base_short": "OLMo-2-0425-1B",
|
| 10 |
+
"train_var": "en-AU",
|
| 11 |
+
"selected_on_val_mean_macro_f1": 0.7437,
|
| 12 |
+
"reported_test_mean_macro_f1": 0.7561
|
| 13 |
+
},
|
| 14 |
+
"en-IN": {
|
| 15 |
+
"base_short": "Qwen2.5-1.5B",
|
| 16 |
+
"train_var": "en-IN",
|
| 17 |
+
"selected_on_val_mean_macro_f1": 0.7209,
|
| 18 |
+
"reported_test_mean_macro_f1": 0.6078
|
| 19 |
+
}
|
| 20 |
+
}
|
eval_outputs/confusion_matrices/best_on_en-AU.png
ADDED
|
eval_outputs/confusion_matrices/best_on_en-IN.png
ADDED
|
eval_outputs/confusion_matrices/best_on_en-UK.png
ADDED
|
eval_outputs/errors_for_q4/README.md
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Q4 errors hand-off (for Shoeb)
|
| 2 |
+
|
| 3 |
+
Source adapter: `HuggingFaceTB/SmolLM2-1.7B` + LoRA, trained on `en-UK` with seed 2024. Selected as the overall best adapter by mean validation Macro-F1 across the 3 test sets. Reported numbers and the misclassified examples below come from the held-out test split for that val-selected adapter.
|
| 4 |
+
|
| 5 |
+
The CSV `SmolLM2-1.7B_en-UK_seed2024_misclassified.csv` has 40 rows sampled (`random_state=42`) from all test-set misclassifications across en-UK, en-AU, and en-IN combined.
|
| 6 |
+
|
| 7 |
+
Columns:
|
| 8 |
+
- `text` -- the original review or comment
|
| 9 |
+
- `true_label` -- ground-truth Sarcasm label (0 = not_sarcastic, 1 = sarcastic)
|
| 10 |
+
- `predicted_label` -- model's prediction (same 0/1 encoding)
|
| 11 |
+
- `variety` -- which test set the example came from (en-UK / en-AU / en-IN)
|
| 12 |
+
- `base_model` -- Hugging Face ID of the base model used
|
| 13 |
+
- `training_variety` -- variety the adapter was trained on (en-UK in this case)
|
| 14 |
+
- `seed` -- random seed used during the adapter's training
|
| 15 |
+
|
| 16 |
+
For Q4: pick any 4 examples to write a Few-Shot Prompt with your own explanations, test the remaining 6 against that prompt per the spec, and report the before/after analysis.
|
eval_outputs/errors_for_q4/SmolLM2-1.7B_en-UK_seed2024_misclassified.csv
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
text,true_label,predicted_label,variety,base_model,training_variety,seed
|
| 2 |
+
That might cause a succesionary crisis.,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 3 |
+
Kannadigas say they are proud Indians as well (outside of India).,1,0,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 4 |
+
Australia doesn't really have crack.,0,1,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 5 |
+
I love super nintendo chalmers!,0,1,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 6 |
+
Iss clown ko andar daalo.,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 7 |
+
Ordered a Szechuan I got was chicken & carrot dripping in a sweet sauce that tasted like Kantong sauce straight out of a jar. It was disgusting. A restaurant is only as good as their chef!!!,1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 8 |
+
Just a rivalry between 2 criminals or maybe a publicity stunt,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 9 |
+
"Perfectly curated ""story"" to seem more ""relatable"" to the audience, like ""see, I'm a normal girl with flaws too"", except it just highlights her abnormal behaviour. That is how it comes across to me. Why people can't see beyond this act is surprising to me. No offence, but she really seems like someone with zero personality, and even this ""incident"" she's recalling seems fake.
|
| 10 |
+
Edit: As evident from the comments here, although her version of ""being attached"" seems exaggerated, she's struck a chord with the audience. But everything she does looks and sounds so fake I'll have trouble believing anything she says.",1,0,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 11 |
+
"In a room full of idiots and wise men, the idiots scream the loudest.",0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 12 |
+
I can see people buying it for kinky reasons only,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 13 |
+
Not sure if Clark rubber is your supplier of fish or your dishing up carp but seriously look at your supplier of fish it was terrible 2 stars for the chips and salt make sure your stem dimeys are ready before serving also!!!,1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 14 |
+
Why don't we just clear all the forests then? What was the point of learning all that environmental conservation in our schools?,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 15 |
+
">We all be fucking
|
| 16 |
+
I see you have forgotten you are posting on reddit where everyone is a virgin.",0,1,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 17 |
+
"Tesla drivers are not just that, but they are also pissed off idiots because the price has dropped by another 20%.",1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 18 |
+
"Rahul Gandhi come out of the bubble, resign and leave the country.",0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 19 |
+
"Pretty well the opposite, have to try and speak more clearly as my country accent is a bit strong. You do see some people absolutely hyping it up and it's cringey. It's even worse when some of them try and use the local language but just murder it and make it sound nothing like the local language - Sell a mat mal lamb, tear rim a kassy mate.",1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 20 |
+
We ordered 4 burgers. Each burger had about 3 strands of lettuce in them which was super disappointing. The chips were barely cooked and not even salted. Disappointing stop for us.,1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 21 |
+
More racist bollox from the tories,0,1,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 22 |
+
"""Perfect is the enemy of good"" - Voltaire
|
| 23 |
+
Sure, Labour are tories in red ties. At least they aren't the bastard tories.
|
| 24 |
+
Any step to the left is a step in the right direction.",1,0,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 25 |
+
"Oh honey he doesn't hate you, he just enjoys exploiting you for the benefit of an aging population that sees no concerns in destroying the livability of this this world in the next 20 years",1,0,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 26 |
+
For some? He should have his passport taken.,0,1,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 27 |
+
Yeah well out of this doctor and what he actually said once by the way and say an afl player. I don't think you have any clue about people and you throw extreme slurs around not because it is true but because they don't share your world view.,1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 28 |
+
Definitely not him,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 29 |
+
"I know everyone is enjoying Ed Davey's meme stunts of the last few days, but now coupled with this as their first concrete policy, is anyone else noticing how absolutely detached from reality and tone deaf the Lib Dems are?
|
| 30 |
+
The country is broken. Please come up with some ideas that will help and maybe worry about easier access to PL games in the next parliament once ambulance handovers and cancer wait times have been fixed.",0,1,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 31 |
+
"No this is why, we need to divorce infrastructure from politics and let engineers make decisions rather than politicians. Every project, the big decisions are made by people with little to no-understanding of what they are deciding upon.",1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 32 |
+
Dude. I'm very sorry your relative's vote didn't count. I hope we can do something after the elections about this.,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 33 |
+
Why does he stay? Because the bitch will take half his money or more and the son.,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 34 |
+
What happened to west Bengal?,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 35 |
+
"Was really looking forward to this place, based on recommendations. However, the food was really underwhelming. I do n't think I ever had a "" bad "" Chinese / Korean meal( perhaps the luck ran out) this would have to be on the bottom of the list. Service, it was like being at an Aldi checkout. We actually had to ask for the food run to be slowed down-firstly, we did n't have space on the table and secondly, it felt like we were being rushed to eat. The pressure to chow down your meal was intense.",1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 36 |
+
"Pubs around my area banned those all-red shoes that eshays sometimes wear now. I think it's an alright policy I've never met anyone wearing them who wasn't a fuckwit.
|
| 37 |
+
If you like the pubs just try some other nikes? In my experience most of them are pretty comfy. Ignore all the sad cunts, idk why everyone is so pissed off about a reformed eshay who still likes his shoes",1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 38 |
+
"If you don't have much disposable income now, a car will send you broke. I understand wanting to be able to drive, but owning a car can be a huge drain on your finances. I hope you find what you're looking for :",0,1,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 39 |
+
Yaar that is post pregnancy weight. She will look like her old self whenever she loses it.,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 40 |
+
What a shithole we are.,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 41 |
+
"smart move for Albo to stay out of it, trumps still the front runner and if you go back and look at polling, the mugshot release of trump led to a huge spike in his numbers and i remember seeing opinion polling showing about 64% of people thought the charges against him were politically motivated, im not sure if the positive effect on trumps numbers will have diminishing returns but either way the democrats really need to change up their strategy if they want to win in November",0,1,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 42 |
+
That pro-Israel lawyer was really grasping at straws lol,0,1,en-UK,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 43 |
+
When did Religious laws rise above Government laws ?,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 44 |
+
Get better LED Bulbs please,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 45 |
+
"Made in India, Hacked in Pakistan",0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 46 |
+
"Anyone proposing to run as a candidate for election should have to show that they have completed a civics course. This course should be free of charge, taught at accessible times, as adult community education. The course should cover the history of democracy, the Australian system, comparison to other systems in the past and in other nations, the expectations of an elected official, basic ethical philosophy etc, all up around twenty two-hour classes with assignments in between.
|
| 47 |
+
No intelligent, honest, decent adult should have any difficulty with this type of course, however it would at least put those who are not, on notice.",1,0,en-AU,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
| 48 |
+
Whoever takes this guy seriously...,0,1,en-IN,HuggingFaceTB/SmolLM2-1.7B,en-UK,2024
|
eval_outputs/matrices/Llama-3.2-1B_mean.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.744,0.508,0.6
|
| 3 |
+
en-IN,0.513,0.627,0.645
|
| 4 |
+
en-UK,0.539,0.627,0.732
|
eval_outputs/matrices/Llama-3.2-1B_std.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.016,0.002,0.009
|
| 3 |
+
en-IN,0.016,0.031,0.046
|
| 4 |
+
en-UK,0.008,0.037,0.022
|
eval_outputs/matrices/OLMo-2-0425-1B_mean.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.756,0.51,0.584
|
| 3 |
+
en-IN,0.567,0.602,0.677
|
| 4 |
+
en-UK,0.56,0.583,0.713
|
eval_outputs/matrices/OLMo-2-0425-1B_std.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.01,0.007,0.006
|
| 3 |
+
en-IN,0.055,0.003,0.036
|
| 4 |
+
en-UK,0.027,0.009,0.02
|
eval_outputs/matrices/Qwen2.5-1.5B_mean.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.705,0.487,0.591
|
| 3 |
+
en-IN,0.501,0.608,0.606
|
| 4 |
+
en-UK,0.589,0.562,0.65
|
eval_outputs/matrices/Qwen2.5-1.5B_std.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.012,0.013,0.009
|
| 3 |
+
en-IN,0.014,0.023,0.071
|
| 4 |
+
en-UK,0.022,0.017,0.032
|
eval_outputs/matrices/SmolLM2-1.7B_mean.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.735,0.496,0.594
|
| 3 |
+
en-IN,0.565,0.599,0.646
|
| 4 |
+
en-UK,0.645,0.558,0.698
|
eval_outputs/matrices/SmolLM2-1.7B_std.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
train_var,en-AU,en-IN,en-UK
|
| 2 |
+
en-AU,0.008,0.006,0.004
|
| 3 |
+
en-IN,0.083,0.012,0.007
|
| 4 |
+
en-UK,0.029,0.007,0.009
|
headline_heatmaps.png
ADDED
|
Git LFS Details
|
main_q2_3.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
q5_2_inference.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
| 3 |
+
from peft import PeftModel
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def load(base_id, adapter_repo):
|
| 7 |
+
tok = AutoTokenizer.from_pretrained(base_id)
|
| 8 |
+
if tok.pad_token is None:
|
| 9 |
+
tok.pad_token = tok.eos_token
|
| 10 |
+
base = AutoModelForSequenceClassification.from_pretrained(
|
| 11 |
+
base_id, num_labels=2, dtype=torch.bfloat16,
|
| 12 |
+
)
|
| 13 |
+
base.config.pad_token_id = tok.pad_token_id
|
| 14 |
+
model = PeftModel.from_pretrained(base, adapter_repo)
|
| 15 |
+
model.eval()
|
| 16 |
+
if torch.cuda.is_available():
|
| 17 |
+
model.to('cuda')
|
| 18 |
+
return tok, model
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@torch.no_grad()
|
| 22 |
+
def predict(tok, model, text, max_len=256):
|
| 23 |
+
inputs = tok(text, return_tensors='pt', truncation=True, max_length=max_len, padding=True)
|
| 24 |
+
if torch.cuda.is_available():
|
| 25 |
+
inputs = {k: v.to('cuda') for k, v in inputs.items()}
|
| 26 |
+
logits = model(**inputs).logits
|
| 27 |
+
return int(logits.argmax(dim=-1))
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
tok, model = load(
|
| 32 |
+
"Qwen/Qwen2.5-1.5B",
|
| 33 |
+
"berkinksk/besstie-sarcasm-en-UK-Qwen2.5-1.5B-seed2024",
|
| 34 |
+
)
|
| 35 |
+
text = "Oh great, another rainy day in Manchester. Just what I wanted."
|
| 36 |
+
pred = predict(tok, model, text)
|
| 37 |
+
print(pred)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch>=2.1.0
|
| 2 |
+
transformers>=4.40.0
|
| 3 |
+
peft>=0.10.0
|
| 4 |
+
gradio>=4.20.0
|
| 5 |
+
accelerate>=0.28.0
|