Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from pathlib import Path | |
| # Global token storage | |
| token_df = pd.DataFrame() | |
| # Generate generic sample sentences | |
| def make_sample_data(n=100): | |
| people = ["Alice","Bob","Charlie","Diane","Eve"] | |
| orgs = ["Acme Corp","Globex","Initech","Umbrella","Stark Inc"] | |
| locs = ["Paris","New York","London","Tokyo","Sydney"] | |
| verbs = ["visited","joined","founded","traveled to","met with"] | |
| rows = [] | |
| for i in range(n): | |
| p = people[i % len(people)] | |
| v = verbs[i % len(verbs)] | |
| o = orgs[i % len(orgs)] | |
| l = locs[i % len(locs)] | |
| rows.append({"text": f"{p} {v} {o} in {l}."}) | |
| return pd.DataFrame(rows) | |
| def load_data(file): | |
| global token_df | |
| # Load user CSV or fallback to sample | |
| if file: | |
| df = pd.read_csv(file.name) | |
| else: | |
| df = make_sample_data(100) | |
| if "text" not in df.columns: | |
| return ( | |
| gr.update(visible=False), | |
| "❌ CSV must contain a `text` column.", | |
| gr.update(visible=False) | |
| ) | |
| # Tokenize into (sentence_id, token, label) | |
| records = [] | |
| for sid, txt in enumerate(df["text"]): | |
| for tok in txt.split(): | |
| records.append({"sentence_id": sid, "token": tok, "label": "O"}) | |
| token_df = pd.DataFrame(records) | |
| return ( | |
| gr.update(value=token_df, visible=True), | |
| f"✅ Loaded {len(df)} sentences → {len(token_df)} tokens.", | |
| gr.update(visible=True) | |
| ) | |
| def save_edits(table): | |
| global token_df | |
| token_df = pd.DataFrame(table, columns=["sentence_id","token","label"]) | |
| return "💾 Edits saved." | |
| def download_tokens(): | |
| token_df.to_csv("raw_tokens.csv", index=False) | |
| return Path("raw_tokens.csv") | |
| def download_iob(): | |
| # Convert to IOB | |
| iob, prev = [], {} | |
| for _, r in token_df.iterrows(): | |
| sid, lbl = r["sentence_id"], r["label"] | |
| if lbl == "O": | |
| iob.append("O") | |
| prev[sid] = None | |
| else: | |
| tag = ("I-" if prev.get(sid)==lbl else "B-") + lbl | |
| iob.append(tag) | |
| prev[sid] = lbl | |
| out = token_df.copy() | |
| out["iob"] = iob | |
| out.to_csv("ner_iob.csv", index=False) | |
| return Path("ner_iob.csv") | |
| with gr.Blocks() as app: | |
| gr.Markdown("# 🏷️ Label It! Mini-NER") | |
| gr.Markdown("**Step 1:** Upload a CSV with a `text` column, or leave blank for sample sentences.") | |
| with gr.Row(): | |
| file_in = gr.File(label="📁 Upload CSV", file_types=[".csv"]) | |
| load_btn = gr.Button("Load Data") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| table = gr.Dataframe( | |
| headers=["sentence_id","token","label"], | |
| editable=True, | |
| visible=False, | |
| label="📝 Annotate Tokens" | |
| ) | |
| with gr.Row(visible=False) as actions: | |
| save_btn = gr.Button("💾 Save Edits") | |
| dl_tokens = gr.DownloadButton( | |
| fn=download_tokens, | |
| file_name="raw_tokens.csv", | |
| label="⬇️ Download Tokens CSV" | |
| ) | |
| dl_iob = gr.DownloadButton( | |
| fn=download_iob, | |
| file_name="ner_iob.csv", | |
| label="⬇️ Download IOB CSV" | |
| ) | |
| # Bind events | |
| load_btn.click( | |
| load_data, | |
| inputs=file_in, | |
| outputs=[table, status, actions] | |
| ) | |
| save_btn.click( | |
| save_edits, | |
| inputs=table, | |
| outputs=status | |
| ) | |
| gr.Markdown(""" | |
| **Step 2:** | |
| - Click into the **label** column and type one of: | |
| `PER`, `ORG`, `LOC`, or leave as `O`. | |
| - **Save Edits**, then download your token CSV or IOB‐tagged CSV. | |
| """) | |
| app.launch() | |