Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter | |
| from data_reviewer import create_data_viewer | |
| # Define constants and enums | |
| TITLE = "<h1>VL-RewardBench Leaderboard</h1>" | |
| INTRODUCTION_TEXT = "https://vl-rewardbench.github.io/" | |
| GOOGLE_SHEET_URL = ( | |
| "https://docs.google.com/spreadsheets/d/1fPqZLF1FQFyy4n9I6GNk7MeDSGlJDVVes9yEBqN8RwU/export?gid=0&format=csv" | |
| ) | |
| ABOUT_TEXT = """Welcome to VLRewardBench! | |
| We introduce a novel benchmark VL-RewardBench, designed to expose limitations of vision-language reward models across visual perception, hallucination detection, and reasoning tasks. | |
| Our evaluation reveals including that models primarily fail at basic visual perception rather than reasoning, and that performance on our benchmark strongly correlates (r>0.9) with downstream vision-language tasks. | |
| The splits are: | |
| - General (VLFeedback + WildVision | |
| - Hallucination (POVID, RLAIF, RLHF-V) | |
| - Reasoning (MMMU-Pro, MathVerse)""" | |
| class AutoEvalColumn: | |
| model = {"name": "Model", "type": "markdown", "displayed_by_default": True, "never_hidden": True} | |
| type = {"name": "Type", "type": "str", "displayed_by_default": True, "never_hidden": False} | |
| general = {"name": "General", "type": "float", "displayed_by_default": True, "never_hidden": False} | |
| hallucination = {"name": "Hallucination", "type": "float", "displayed_by_default": True, "never_hidden": False} | |
| reasoning = {"name": "Reasoning", "type": "float", "displayed_by_default": True, "never_hidden": False} | |
| overall = {"name": "Overall Consistency", "type": "float", "displayed_by_default": True, "never_hidden": False} | |
| macro = {"name": "Macro Average", "type": "float", "displayed_by_default": True, "never_hidden": False} | |
| model_size = {"name": "Model", "type": "str", "displayed_by_default": False, "never_hidden": False} | |
| opensource = {"name": "Open Source?", "type": "str", "displayed_by_default": False, "never_hidden": False} | |
| def get_result_data(): | |
| return pd.read_csv(GOOGLE_SHEET_URL) | |
| def init_leaderboard(dataframe): | |
| if dataframe is None or dataframe.empty: | |
| raise ValueError("Leaderboard DataFrame is empty or None.") | |
| return Leaderboard( | |
| value=dataframe, | |
| datatype=[col["type"] for col in AutoEvalColumn.__dict__.values() if isinstance(col, dict)], | |
| select_columns=SelectColumns( | |
| default_selection=[ | |
| col["name"] | |
| for col in AutoEvalColumn.__dict__.values() | |
| if isinstance(col, dict) and col["displayed_by_default"] | |
| ], | |
| cant_deselect=[ | |
| col["name"] | |
| for col in AutoEvalColumn.__dict__.values() | |
| if isinstance(col, dict) and col.get("never_hidden", False) | |
| ], | |
| label="Select Columns to Display:", | |
| ), | |
| search_columns=["Model"], | |
| filter_columns=[ | |
| ColumnFilter("Open Source?", type="checkboxgroup", label="Open Source?"), | |
| ColumnFilter("Model Size", type="checkboxgroup", label="Model Size"), | |
| ColumnFilter("Type", type="checkboxgroup", label="Type"), | |
| ], | |
| interactive=False, | |
| ) | |
| def format_model_link(row): | |
| """Format model name as HTML link if URL is available""" | |
| model_name = row['Model'] | |
| url = row.get('URL', '') | |
| if pd.notna(url) and url.strip(): | |
| return f'<a href="{url}" target="_blank">{model_name}</a>' | |
| return model_name | |
| # Initialize the Gradio interface | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT) | |
| with gr.Tabs() as tabs: | |
| with gr.TabItem("🏅 Leaderboard"): | |
| # Load your DataFrame here instead of the sample data | |
| df = get_result_data() | |
| df["Model"] = df.apply(format_model_link, axis=1) | |
| del df["URL"] | |
| df = df.sort_values('Overall Consistency', ascending=False) | |
| leaderboard = init_leaderboard(df) | |
| with gr.TabItem("📊 Data Viewer"): | |
| dataset_split, sample_idx = create_data_viewer() | |
| with gr.TabItem("ℹ️ About"): | |
| gr.Markdown(ABOUT_TEXT) | |
| demo.launch() | |