Spaces:

vidore
/

vidore-leaderboard

Running

App Files Files Community

HugSib commited on Jun 27, 2024

Commit

3c64e23

verified ·

1 Parent(s): 1045c52

feat : added search + checkboxes + documentation

Browse files

Files changed (3) hide show

app.py +114 -46
model_handler.py +105 -0
utils.py +37 -0

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-from data.model_handler import ModelHandler
-from app.utils import add_rank_and_format, get_refresh_function
 import gradio as gr
-METRICS = ["ndcg_at_5", "recall_at_1", "recall_at_5", "mrr_at_5"]
 def main():
     model_handler = ModelHandler()
@@ -31,52 +32,119 @@ def main():
     .filter-checkbox-group {
         max-width: max-content;
     }
     """
     with gr.Blocks(css=css) as block:
-        gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark 📚🔍")
-        gr.Markdown("## From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀")
-        gr.Markdown(
-            """
-        Visual Document Retrieval Benchmark leaderboard. To submit, refer to the <a href="https://github.com/tonywu71/vidore-benchmark/" target="_blank" style="text-decoration: underline">ViDoRe GitHub repository</a>.  Refer to the [ColPali paper](https://arxiv.org/abs/XXXX.XXXXX) for details on metrics, tasks and models.
-        """
-        )
-        #all_columns = list(data.columns)
-        #default_columns = all_columns
-        with gr.Row():
-            metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
-            #column_checkboxes = gr.CheckboxGroup(choices=all_columns, value=default_columns, label="Select Columns to Display")
-        with gr.Row():
-            datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1)
-            dataframe = gr.Dataframe(data, datatype=datatype, type="pandas")
-        with gr.Row():
-            refresh_button = gr.Button("Refresh")
-            refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20)
-        # Automatically refresh the dataframe when the dropdown value changes
-        metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe)
-        #column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe)
-        gr.Markdown(
-            f"""
-        - **Total Datasets**: {NUM_DATASETS}
-        - **Total Scores**: {NUM_SCORES}
-        - **Total Models**: {NUM_MODELS}
-        """
-            + r"""
-        Please consider citing:
-        ```bibtex
-        INSERT LATER
-        ```
-        """
-        )
     block.queue(max_size=10).launch(debug=True)

 import gradio as gr
+from app.utils import add_rank_and_format, filter_models, get_refresh_function
+from data.model_handler import ModelHandler
+METRICS = ["ndcg_at_5", "recall_at_1"]
 def main():
     model_handler = ModelHandler()
     .filter-checkbox-group {
         max-width: max-content;
     }
+    #markdown size
+    .markdown {
+        font-size: 1rem;
+    }
     """
     with gr.Blocks(css=css) as block:
+        with gr.Tabs():
+            with gr.TabItem("🏆 Leaderboard"):
+                gr.Markdown("# ViDoRe: The Visual Document Retrieval Benchmark 📚🔍")
+                gr.Markdown("## From the paper - ColPali: Efficient Document Retrieval with Vision Language Models 👀")
+                gr.Markdown(
+                    """
+                Visual Document Retrieval Benchmark leaderboard. To submit, refer to the corresponding tab.
+                Refer to the [ColPali paper](https://arxiv.org/abs/XXXX.XXXXX) for details on metrics, tasks and models.
+                """
+                )
+                datasets_columns = list(data.columns[3:])
+                anchor_columns = list(data.columns[:3])
+                default_columns = anchor_columns + datasets_columns
+                with gr.Row():
+                    metric_dropdown = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
+                    research_textbox = gr.Textbox(placeholder="🔍 Search Models... [press enter]", label="Filter Models by Name", )
+                    column_checkboxes = gr.CheckboxGroup(choices=datasets_columns, value=default_columns, label="Select Columns to Display")
+                with gr.Row():
+                    datatype = ["number", "markdown"] + ["number"] * (NUM_DATASETS + 1)
+                    dataframe = gr.Dataframe(data, datatype=datatype, type="pandas")
+                def update_data(metric, search_term, selected_columns):
+                    data = model_handler.get_vidore_data(metric)
+                    data = add_rank_and_format(data)
+                    data = filter_models(data, search_term)
+                    if selected_columns:
+                        selected_columns = selected_columns
+                        data = data[selected_columns]
+                    return data
+                with gr.Row():
+                    refresh_button = gr.Button("Refresh")
+                    refresh_button.click(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe, concurrency_limit=20)
+                # Automatically refresh the dataframe when the dropdown value changes
+                metric_dropdown.change(get_refresh_function(), inputs=[metric_dropdown], outputs=dataframe)
+                research_textbox.submit(
+                    lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns),
+                    inputs=[metric_dropdown, research_textbox, column_checkboxes],
+                    outputs=dataframe
+                )
+                column_checkboxes.change(
+                    lambda metric, search_term, selected_columns: update_data(metric, search_term, selected_columns),
+                    inputs=[metric_dropdown, research_textbox, column_checkboxes],
+                    outputs=dataframe
+                )
+                #column_checkboxes.change(get_refresh_function(), inputs=[metric_dropdown, column_checkboxes], outputs=dataframe)
+                gr.Markdown(
+                    f"""
+                - **Total Datasets**: {NUM_DATASETS}
+                - **Total Scores**: {NUM_SCORES}
+                - **Total Models**: {NUM_MODELS}
+                """
+                    + r"""
+                Please consider citing:
+                ```bibtex
+                INSERT LATER
+                ```
+                """
+                )
+            with gr.TabItem("📚 Submit your model"):
+                gr.Markdown("# How to Submit a New Model to the Leaderboard")
+                gr.Markdown(
+                    """
+                    To submit a new model to the ViDoRe leaderboard, follow these steps:
+                    1. **Evaluate your model**:
+                       - You can either follow the evaluation script provided in the [ViDoRe GitHub repository](https://github.com/tonywu71/vidore-benchmark/)
+                       - Use your own evaluation script.
+                    2. **Format your submission file**:
+                        - The submission file should be named `results.json`, and therefore in JSON format.
+                        - It should have the following structure:
+                        ```json
+                        {
+                            "dataset_name_1": {
+                                "metric_1": score_1,
+                                "metric_2": score_2,
+                                ...
+                            },
+                            "dataset_name_2": {
+                                "metric_1": score_1,
+                                "metric_2": score_2,
+                                ...
+                            },
+                        }
+                        ```
+                        - The dataset names should be the same as viDoRe dataset names listed in the following collection: [ViDoRe Benchmark](https://huggingface.co/collections/vidore/vidore-benchmark-667173f98e70a1c0fa4db00d).
+                    3. **Submit your model**:
+                        - Create a huggingface model repository with your model and the submission file.
+                        - Add the tag 'vidore' to your model.
+                    And you're done ! Your model will appear on the leaderboard once it is approved by the ViDoRe team.
+                    """
+                )
     block.queue(max_size=10).launch(debug=True)

model_handler.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import json
+import os
+from typing import Dict
+from huggingface_hub import HfApi, hf_hub_download, metadata_load
+import pandas as pd
+from .dataset_handler import get_datasets_nickname, VIDORE_DATASETS_KEYWORDS
+class ModelHandler:
+    def __init__(self, model_infos_path="model_infos.json"):
+        self.api = HfApi()
+        self.model_infos_path = model_infos_path
+        self.model_infos = self._load_model_infos()
+    def _load_model_infos(self) -> Dict:
+        if os.path.exists(self.model_infos_path):
+            with open(self.model_infos_path) as f:
+                return json.load(f)
+        return {}
+    def _save_model_infos(self):
+        with open(self.model_infos_path, "w") as f:
+            json.dump(self.model_infos, f)
+    def get_vidore_data(self, metric="ndcg_at_5"):
+        models = self.api.list_models(filter="vidore")
+        repositories = [model.modelId for model in models]  # type: ignore
+        for repo_id in repositories:
+            files = [f for f in self.api.list_repo_files(repo_id) if f.endswith('_metrics.json') or f == 'results.json']
+            if len(files) == 0:
+                continue
+            else:
+                for file in files:
+                    if file.endswith('results.json'):
+                        model_name = repo_id.replace('/', '_')
+                    else:
+                        model_name = file.split('_metrics.json')[0]
+                    if model_name not in self.model_infos:
+                        readme_path = hf_hub_download(repo_id, filename="README.md")
+                        meta = metadata_load(readme_path)
+                        try:
+                            result_path = hf_hub_download(repo_id, filename=file)
+                            with open(result_path) as f:
+                                results = json.load(f)
+                            for dataset in results:
+                                results[dataset] = {key: value for key, value in results[dataset].items()}
+                            self.model_infos[model_name] = {"meta": meta, "results": results}
+                        except Exception as e:
+                            print(f"Error loading {model_name} - {e}")
+                            continue
+        #self._save_model_infos()
+        model_res = {}
+        if len(self.model_infos) > 0:
+            for model in self.model_infos.keys():
+                res = self.model_infos[model]["results"]
+                dataset_res = {}
+                for dataset in res.keys():
+                    #for each keyword check if it is in the dataset name if not continue
+                    if not any(keyword in dataset for keyword in VIDORE_DATASETS_KEYWORDS):
+                        print(f"{dataset} not found in ViDoRe datasets. Skipping ...")
+                        continue
+                    dataset_nickname = get_datasets_nickname(dataset)
+                    dataset_res[dataset_nickname] = res[dataset][metric]
+                model_res[model] = dataset_res
+            df = pd.DataFrame(model_res).T
+            return df
+        return pd.DataFrame()
+    @staticmethod
+    def add_rank(df):
+        df.fillna(0.0, inplace=True)
+        cols_to_rank = [
+            col
+            for col in df.columns
+            if col
+            not in [
+                "Model",
+                "Model Size (Million Parameters)",
+                "Memory Usage (GB, fp32)",
+                "Embedding Dimensions",
+                "Max Tokens",
+            ]
+        ]
+        if len(cols_to_rank) == 1:
+            df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
+        else:
+            df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
+            df.sort_values("Average", ascending=False, inplace=True)
+        df.insert(0, "Rank", list(range(1, len(df) + 1)))
+        #multiply values by 100 if they are floats and round to 1 decimal place
+        for col in df.columns:
+            if df[col].dtype == "float64":
+                df[col] = df[col].apply(lambda x: round(x * 100, 1))
+        return df

utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from data.model_handler import ModelHandler
+def make_clickable_model(model_name, link=None):
+    if link is None:
+        desanitized_model_name = model_name.replace("_", "/")
+        if '/captioning' in desanitized_model_name:
+            desanitized_model_name = desanitized_model_name.replace('/captioning', '')
+        if '/ocr' in desanitized_model_name:
+            desanitized_model_name = desanitized_model_name.replace('/ocr', '')
+        link = "https://huggingface.co/" + desanitized_model_name
+    return f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name}</a>'
+def add_rank_and_format(df):
+    df = df.reset_index()
+    df = df.rename(columns={"index": "Model"})
+    df = ModelHandler.add_rank(df)
+    df["Model"] = df["Model"].apply(make_clickable_model)
+    return df
+def get_refresh_function():
+    def _refresh(metric):
+        model_handler = ModelHandler()
+        data_task_category = model_handler.get_vidore_data(metric)
+        df = add_rank_and_format(data_task_category)
+        return df
+    return _refresh
+def filter_models(data, search_term):
+    if search_term:
+        data = data[data['Model'].str.contains(search_term, case=False, na=False)]
+    return data