Spaces:

wangrongsheng
/

RS-Medical-LLM-Leaderboard

Sleeping

App Files Files Community

wangrongsheng commited on Jul 14

Commit

5e28d59

verified ·

1 Parent(s): 009e4b1

init

Browse files

Files changed (4) hide show

app.py +920 -0
metadata/medical_data.json +16 -0
metadata/medical_mm_data.json +310 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,920 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+import random
+import json
+import os
+# 全局变量存储置顶行
+pinned_rows_global = set()
+# 从JSON文件读取医疗大语言模型排行榜数据
+def generate_llm_data():
+    """从metadata/medical_data.json读取医疗大语言模型排行榜数据"""
+    try:
+        # 读取JSON文件
+        json_path = "metadata/medical_data.json"
+        with open(json_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        # 转换为DataFrame
+        df = pd.DataFrame(data)
+        # 处理null值，将其替换为"-"
+        df = df.fillna("-")
+        # 保持使用链接的原始URL格式，稍后在界面中处理
+        # 与多模态页面保持一致，不在数据预处理阶段转换HTML
+        # 根据平均分排序（处理null值的情况）
+        # 先将平均分为null的行移到最后
+        df_with_score = df[df['平均分'] != "-"].copy()
+        df_without_score = df[df['平均分'] == "-"].copy()
+        # 对有平均分的数据按平均分降序排序
+        if not df_with_score.empty:
+            df_with_score = df_with_score.sort_values('平均分', ascending=False)
+        # 合并数据
+        df_sorted = pd.concat([df_with_score, df_without_score], ignore_index=True)
+        # 添加排名
+        df_sorted['排名'] = range(1, len(df_sorted) + 1)
+        # 重新排列列的顺序：排名、模型名称、平均分、其他字段
+        # 获取所有列名
+        all_columns = list(df_sorted.columns)
+        # 定义新的列顺序：排名、模型名称、平均分
+        new_columns = ['排名', '模型名称', '平均分']
+        # 添加其他列（除了已经包含的列）
+        other_columns = [col for col in all_columns if col not in new_columns]
+        new_columns.extend(other_columns)
+        # 重新排列列
+        df_sorted = df_sorted[new_columns]
+        return df_sorted
+    except FileNotFoundError:
+        print(f"警告: 找不到文件 {json_path}，使用默认数据")
+        # 如果文件不存在，返回空的DataFrame
+        return pd.DataFrame()
+# 从JSON文件读取医疗多模态大模型排行榜数据
+def generate_multimodal_data():
+    """从metadata/medical_mm_data.json读取医疗多模态大模型排行榜数据"""
+    try:
+        # 读取JSON文件
+        json_path = "metadata/medical_mm_data.json"
+        with open(json_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        # 转换为DataFrame
+        df = pd.DataFrame(data)
+        # 处理null值，将其替换为"-"
+        df = df.fillna("-")
+        # 过滤掉类型为"研究"的数据
+        df = df[df['类型'] != '研究']
+        # 保持使用链接的原始URL格式，稍后在界面中处理
+        # Gradio Dataframe不支持HTML，我们需要在界面层面处理链接显示
+        # 根据平均分排序（处理null值的情况）
+        # 先将平均分为null的行移到最后
+        df_with_score = df[df['平均分'] != "-"].copy()
+        df_without_score = df[df['平均分'] == "-"].copy()
+        # 对有平均分的数据按平均分降序排序
+        if not df_with_score.empty:
+            df_with_score = df_with_score.sort_values('平均分', ascending=False)
+        # 合并数据
+        df_sorted = pd.concat([df_with_score, df_without_score], ignore_index=True)
+        # 添加排名
+        df_sorted['排名'] = range(1, len(df_sorted) + 1)
+        # 重新排列列的顺序：排名、模型名称、平均分、其他字段（删除徽章列）
+        # 获取所有列名
+        all_columns = list(df_sorted.columns)
+        # 定义新的列顺序：排名、模型名称、平均分
+        new_columns = ['排名', '模型名称', '平均分']
+        # 添加其他列（除了已经包含的列）
+        other_columns = [col for col in all_columns if col not in new_columns]
+        new_columns.extend(other_columns)
+        # 重新排列列
+        df_sorted = df_sorted[new_columns]
+        return df_sorted
+    except FileNotFoundError:
+        print(f"警告: 找不到文件 {json_path}，使用默认数据")
+        # 如果文件不存在，返回空的DataFrame
+        return pd.DataFrame()
+def get_llm_leaderboard():
+    """获取医疗大语言模型排行榜数据"""
+    return generate_llm_data()
+def generate_llm_html_table(df=None, sort_column=None, sort_order="desc", pinned_rows=None):
+    """生成医疗大语言模型排行榜的HTML表格"""
+    if df is None:
+        df = get_llm_leaderboard()
+    if df.empty:
+        return "<p>暂无数据</p>"
+    if pinned_rows is None:
+        pinned_rows = set()
+    # 如果指定了排序列，则进行排序
+    if sort_column and sort_column in df.columns:
+        # 特殊处理排名列：按平均分排序而不是按排名数值排序
+        if sort_column == '���名':
+            # 按平均分排序来实现排名排序
+            df_for_sort = df.copy()
+            df_for_sort['平均分_numeric'] = pd.to_numeric(df_for_sort['平均分'], errors='coerce')
+            # 降序表示按平均分从高到低（排名1,2,3...），升序表示按平均分从低到高（排名倒序）
+            if sort_order == "desc":
+                # 降序：按平均分从高到低，对应排名1,2,3...
+                sorted_indices = df_for_sort.sort_values('平均分_numeric', ascending=False, na_position='last').index
+            else:
+                # 升序：按平均分从低到高，对应排名倒序
+                sorted_indices = df_for_sort.sort_values('平均分_numeric', ascending=True, na_position='last').index
+            df = df.loc[sorted_indices].reset_index(drop=True)
+        # 处理其他数值列的排序
+        elif sort_column in ['平均分', 'MMMU-Med', 'VQA-RAD', 'SLAKE', 'PathVQA', 'PMC-VQA', 'OMVQA', 'MedXQA']:
+            # 保存原始数据
+            original_data = df.copy()
+            # 创建用于排序的数值列
+            df_for_sort = df.copy()
+            df_for_sort[sort_column + '_numeric'] = pd.to_numeric(df_for_sort[sort_column], errors='coerce')
+            # 按数值列排序
+            if sort_order == "asc":
+                sorted_indices = df_for_sort.sort_values(sort_column + '_numeric', ascending=True, na_position='last').index
+            else:
+                sorted_indices = df_for_sort.sort_values(sort_column + '_numeric', ascending=False, na_position='last').index
+            # 使用排序后的索引重新排列原始数据
+            df = original_data.loc[sorted_indices].reset_index(drop=True)
+        else:
+            # 文本列排序
+            ascending = sort_order == "asc"
+            df = df.sort_values(sort_column, ascending=ascending, na_position='last').reset_index(drop=True)
+    # 处理置顶行 - 基于排名号而不是索引
+    if pinned_rows:
+        # 将用户输入的排名号转换为对应的行
+        pinned_df = df[df['排名'].isin(pinned_rows)].copy()
+        unpinned_df = df[~df['排名'].isin(pinned_rows)].copy()
+        # 置顶行按排名从小到大排序（保持排名顺序）
+        if not pinned_df.empty:
+            pinned_df = pinned_df.sort_values('排名', ascending=True)
+        # 未置顶行保持当前的排序（不强制按排名排序）
+        # 这样可以保持用户选择的排序方式
+        # 合并数据：置顶行在前，其他行在后
+        if not pinned_df.empty:
+            display_df = pd.concat([pinned_df, unpinned_df], ignore_index=True)
+        else:
+            display_df = unpinned_df
+    else:
+        # 没有置顶时，保持当前排序（不强制按排名排序）
+        display_df = df.reset_index(drop=True)
+    # 添加行ID用于显示
+    display_df['row_id'] = display_df.index
+    # 生成HTML表格
+    html = """
+    <div style="overflow-x: auto; width: 100%;">
+        <style>
+            .sort-btn {
+                background: none;
+                border: none;
+                cursor: pointer;
+                margin-left: 5px;
+                font-size: 12px;
+                color: #007bff;
+                padding: 2px 4px;
+                border-radius: 3px;
+            }
+            .sort-btn:hover {
+                background-color: #e9ecef;
+            }
+            .sort-btn:active {
+                background-color: #dee2e6;
+            }
+            .pin-btn {
+                background: none;
+                border: none;
+                cursor: pointer;
+                font-size: 16px;
+                padding: 2px 4px;
+                margin-right: 8px;
+                border-radius: 3px;
+            }
+            .pin-btn:hover {
+                background-color: #e9ecef;
+            }
+            .pinned-row {
+                background-color: #e3f2fd !important;
+                border-left: 4px solid #2196f3 !important;
+            }
+        </style>
+        <table style="width: 100%; border-collapse: collapse; margin: 20px 0; font-size: 14px;">
+            <thead>
+                <tr style="background-color: #f8f9fa; border-bottom: 2px solid #dee2e6;">
+    """
+    # 添加表头（钉子列 + 其他列）
+    html += '<th style="padding: 12px 8px; text-align: center; border: 1px solid #dee2e6; font-weight: bold; width: 50px;">📌</th>'
+    for col in display_df.columns:
+        if col == 'row_id':  # 跳过内部使用的row_id列
+            continue
+        html += f'''
+            <th style="padding: 12px 8px; text-align: left; border: 1px solid #dee2e6; font-weight: bold;">
+                {col}
+            </th>
+        '''
+    html += """
+                </tr>
+            </thead>
+            <tbody>
+    """
+    # 添加数据行
+    for idx, row in display_df.iterrows():
+        row_rank = row['排名']  # 使用排名号而不是row_id
+        is_pinned = row_rank in pinned_rows if pinned_rows else False
+        # 为前三名添加特殊样式，置顶行添加置顶样式
+        row_style = ""
+        if is_pinned:
+            row_style = "background-color: #e3f2fd; border-left: 4px solid #2196f3;"
+        elif row['排名'] <= 3:
+            row_style = "background-color: #fff3cd;"
+        elif idx % 2 == 0:
+            row_style = "background-color: #f8f9fa;"
+        html += f'<tr style="{row_style}">'
+        # 添加钉子状态显示列
+        pin_icon = "📌" if is_pinned else "📍"
+        html += f'''
+            <td style="padding: 10px 8px; border: 1px solid #dee2e6; text-align: center;">
+                <span title="排名: {row_rank}">
+                    {pin_icon}
+                </span>
+            </td>
+        '''
+        for col in display_df.columns:
+            if col == 'row_id':  # 跳过内部使用的row_id列
+                continue
+            cell_value = row[col]
+            cell_style = "padding: 10px 8px; border: 1px solid #dee2e6; text-align: left;"
+            # 特殊处理使用链接列
+            if col == "使用链接" and cell_value != "-" and pd.notna(cell_value):
+                cell_content = f'<a href="{cell_value}" target="_blank" style="color: #007bff; text-decoration: none;">尝试使用</a>'
+            # 特殊处理平均分列
+            elif col == "平均分" and cell_value != "-":
+                cell_style += " font-weight: bold; color: #28a745;"
+                cell_content = str(cell_value)
+            else:
+                cell_content = str(cell_value) if pd.notna(cell_value) else "-"
+            html += f'<td style="{cell_style}">{cell_content}</td>'
+        html += '</tr>'
+    html += """
+            </tbody>
+        </table>
+        <script>
+            // 简化的脚本，只保留必要功能
+            console.log('医疗大语言模型排行榜已加载');
+        </script>
+    </div>
+    """
+    return html
+def get_multimodal_leaderboard():
+    """获取医疗多模态大模型排行榜数据"""
+    return generate_multimodal_data()
+def filter_llm_leaderboard(type_filter, min_score):
+    """根据条件筛选医疗大语言模型排行榜"""
+    df = get_llm_leaderboard()
+    if df.empty:
+        return df
+    # 筛选类型
+    if type_filter != "全部":
+        df = df[df["类型"] == type_filter]
+    # 筛选分数（只对有平均分的数据进行筛选）
+    if min_score > 0:
+        # 过滤掉平均分为"-"的行，然后筛选分数
+        df_with_score = df[df["平均分"] != "-"].copy()
+        if not df_with_score.empty:
+            df_with_score = df_with_score[df_with_score["平均分"] >= min_score]
+        # 如果用户设置了最低分数，则不显示没有平均分的模型
+        df = df_with_score
+    # 筛选后保持原始排名顺序（基于平均分的排名）
+    if not df.empty:
+        # 按原始排名排序，保持基于平均分的排名顺序
+        df = df.sort_values("排名", ascending=True).reset_index(drop=True)
+    return df
+def filter_multimodal_leaderboard(type_filter, min_score):
+    """根据条件筛选医疗多模态大模型排行榜"""
+    df = get_multimodal_leaderboard()
+    if df.empty:
+        return df
+    # 筛选类型
+    if type_filter != "全部":
+        df = df[df["类型"] == type_filter]
+    # 筛选分数（只对有平均分的数据进行筛选）
+    if min_score > 0:
+        # 过滤掉平均分为"-"的行，然后筛选分数
+        df_with_score = df[df["平均分"] != "-"].copy()
+        if not df_with_score.empty:
+            df_with_score = df_with_score[df_with_score["平均分"] >= min_score]
+        # 如果用户设置了最低分数，则不显示没有平均分的模型
+        df = df_with_score
+    # 筛选后保持原始排名顺序（基于平均分的排名）
+    if not df.empty:
+        # 按原始排名排序，保持基于平均分的排名顺序
+        df = df.sort_values("排名", ascending=True).reset_index(drop=True)
+    return df
+def generate_multimodal_html_table(df=None, sort_column=None, sort_order="desc", pinned_rows=None):
+    """生成医疗多模态大模型排行榜的HTML表格"""
+    if df is None:
+        df = get_multimodal_leaderboard()
+    if df.empty:
+        return "<p>暂无数据</p>"
+    if pinned_rows is None:
+        pinned_rows = set()
+    # 如果指定了排序列，则进行排序
+    if sort_column and sort_column in df.columns:
+        # 特殊处理排名列：按平均分排序而不是按排名数值排序
+        if sort_column == '排名':
+            # 按平均分排序来实现排名排序
+            df_for_sort = df.copy()
+            df_for_sort['平均分_numeric'] = pd.to_numeric(df_for_sort['平均分'], errors='coerce')
+            # 降序表示按平均分从高到低（排名1,2,3...），升序表示按平均分从低到高（排名倒序）
+            if sort_order == "desc":
+                # ��序：按平均分从高到低，对应排名1,2,3...
+                sorted_indices = df_for_sort.sort_values('平均分_numeric', ascending=False, na_position='last').index
+            else:
+                # 升序：按平均分从低到高，对应排名倒序
+                sorted_indices = df_for_sort.sort_values('平均分_numeric', ascending=True, na_position='last').index
+            df = df.loc[sorted_indices].reset_index(drop=True)
+        # 处理其他数值列的排序
+        elif sort_column in ['平均分', 'MMMU-Med', 'VQA-RAD', 'SLAKE', 'PathVQA', 'PMC-VQA', 'OMVQA', 'MedXQA']:
+            # 保存原始数据
+            original_data = df.copy()
+            # 创建用于排序的数值列
+            df_for_sort = df.copy()
+            df_for_sort[sort_column + '_numeric'] = pd.to_numeric(df_for_sort[sort_column], errors='coerce')
+            # 按数值列排序
+            if sort_order == "asc":
+                sorted_indices = df_for_sort.sort_values(sort_column + '_numeric', ascending=True, na_position='last').index
+            else:
+                sorted_indices = df_for_sort.sort_values(sort_column + '_numeric', ascending=False, na_position='last').index
+            # 使用排序后的索引重新排列原始数据
+            df = original_data.loc[sorted_indices].reset_index(drop=True)
+        else:
+            # 文本列排序
+            ascending = sort_order == "asc"
+            df = df.sort_values(sort_column, ascending=ascending, na_position='last').reset_index(drop=True)
+    # 处理置顶行 - 基于排名号而不是索引
+    if pinned_rows:
+        # 将用户输入的排名号转换为对应的行
+        pinned_df = df[df['排名'].isin(pinned_rows)].copy()
+        unpinned_df = df[~df['排名'].isin(pinned_rows)].copy()
+        # 置顶行按排名从小到大排序（保持排名顺序）
+        if not pinned_df.empty:
+            pinned_df = pinned_df.sort_values('排名', ascending=True)
+        # 未置顶行保持当前的排序（不强制按排名排序）
+        # 这样可以保持用户选择的排序方式
+        # 合并数据：置顶行在前，其他行在后
+        if not pinned_df.empty:
+            display_df = pd.concat([pinned_df, unpinned_df], ignore_index=True)
+        else:
+            display_df = unpinned_df
+    else:
+        # 没有置顶时，保持当前排序（不强制按排名排序）
+        display_df = df.reset_index(drop=True)
+    # 添加行ID用于显示
+    display_df['row_id'] = display_df.index
+    # 生成HTML表格
+    html = """
+    <div style="overflow-x: auto; width: 100%;">
+        <style>
+            .sort-btn {
+                background: none;
+                border: none;
+                cursor: pointer;
+                margin-left: 5px;
+                font-size: 12px;
+                color: #007bff;
+                padding: 2px 4px;
+                border-radius: 3px;
+            }
+            .sort-btn:hover {
+                background-color: #e9ecef;
+            }
+            .sort-btn:active {
+                background-color: #dee2e6;
+            }
+            .pin-btn {
+                background: none;
+                border: none;
+                cursor: pointer;
+                font-size: 16px;
+                padding: 2px 4px;
+                margin-right: 8px;
+                border-radius: 3px;
+            }
+            .pin-btn:hover {
+                background-color: #e9ecef;
+            }
+            .pinned-row {
+                background-color: #e3f2fd !important;
+                border-left: 4px solid #2196f3 !important;
+            }
+        </style>
+        <table style="width: 100%; border-collapse: collapse; margin: 20px 0; font-size: 14px;">
+            <thead>
+                <tr style="background-color: #f8f9fa; border-bottom: 2px solid #dee2e6;">
+    """
+    # 添加表头（钉子列 + 其他列）
+    html += '<th style="padding: 12px 8px; text-align: center; border: 1px solid #dee2e6; font-weight: bold; width: 50px;">📌</th>'
+    for col in display_df.columns:
+        if col == 'row_id':  # 跳过内部使用的row_id列
+            continue
+        html += f'''
+            <th style="padding: 12px 8px; text-align: left; border: 1px solid #dee2e6; font-weight: bold;">
+                {col}
+            </th>
+        '''
+    html += """
+                </tr>
+            </thead>
+            <tbody>
+    """
+    # 添加数据行
+    for idx, row in display_df.iterrows():
+        row_rank = row['排名']  # 使用排名号而不是row_id
+        is_pinned = row_rank in pinned_rows if pinned_rows else False
+        # 为前三名添加特殊样式，置顶行添加置顶样式
+        row_style = ""
+        if is_pinned:
+            row_style = "background-color: #e3f2fd; border-left: 4px solid #2196f3;"
+        elif row['排名'] <= 3:
+            row_style = "background-color: #fff3cd;"
+        elif idx % 2 == 0:
+            row_style = "background-color: #f8f9fa;"
+        html += f'<tr style="{row_style}">'
+        # 添加钉子状态显示列
+        pin_icon = "📌" if is_pinned else "📍"
+        html += f'''
+            <td style="padding: 10px 8px; border: 1px solid #dee2e6; text-align: center;">
+                <span title="排名: {row_rank}">
+                    {pin_icon}
+                </span>
+            </td>
+        '''
+        for col in display_df.columns:
+            if col == 'row_id':  # 跳过内部使用的row_id列
+                continue
+            cell_value = row[col]
+            cell_style = "padding: 10px 8px; border: 1px solid #dee2e6; text-align: left;"
+            # 特殊处理使用链接列
+            if col == "使用链接" and cell_value != "-" and pd.notna(cell_value):
+                cell_content = f'<a href="{cell_value}" target="_blank" style="color: #007bff; text-decoration: none;">尝试使用</a>'
+            # 特殊处理平均分列
+            elif col == "平均分" and cell_value != "-":
+                cell_style += " font-weight: bold; color: #28a745;"
+                cell_content = str(cell_value)
+            else:
+                cell_content = str(cell_value) if pd.notna(cell_value) else "-"
+            html += f'<td style="{cell_style}">{cell_content}</td>'
+        html += '</tr>'
+    html += """
+            </tbody>
+        </table>
+        <script>
+            // 简化的脚本，只保留必要功能
+            console.log('医疗多模态大模型排行榜已加载');
+        </script>
+    </div>
+    """
+    return html
+# 创建Gradio界面
+with gr.Blocks(title="医疗大模型排行榜", theme=gr.themes.Soft(), css="""
+    .responsive-table {
+        width: 100%;
+        overflow-x: auto;
+    }
+    .responsive-table table {
+        width: 100%;
+        min-width: 800px;
+    }
+    /* 确保表格内容不会被截断 */
+    .responsive-table td {
+        white-space: nowrap;
+        overflow: hidden;
+        text-overflow: ellipsis;
+    }
+    /* 平均分列样式 */
+    .responsive-table td:nth-child(3) {
+        font-weight: bold;
+    }
+""") as demo:
+    gr.Markdown("# 🤖 医疗大模型排行榜")
+    gr.Markdown("欢迎来到 RS Medical LLM Leaderboard 排行榜！这里展示了医疗领域大语言模型和医疗多模态模型的性能排名。我们是一个中立的评估机构，旨在将模型性能公平的进行比较。我们将在未来推出医版 Arena 平台。")
+    with gr.Tabs():
+        # 医疗大语言模型排行榜标签页
+        with gr.TabItem("💬 医疗大语言模型排行榜"):
+            # 筛选选项放在上方
+            with gr.Row():
+                llm_type_filter = gr.Dropdown(
+                    choices=["全部", "开源", "商业"],
+                    value="全部",
+                    label="模型类型",
+                    scale=1
+                )
+                llm_min_score = gr.Slider(
+                    minimum=0,
+                    maximum=80,
+                    value=0,
+                    step=5,
+                    label="最低平均分",
+                    scale=2
+                )
+                llm_refresh_btn = gr.Button("🔄 刷新数据", variant="primary", scale=1)
+            # 排序选项
+            with gr.Row():
+                llm_sort_column = gr.Dropdown(
+                    choices=["排名", "模型名称", "平均分", "MMMU-Med", "VQA-RAD", "SLAKE", "PathVQA", "PMC-VQA", "OMVQA", "MedXQA", "最后更新", "类型"],
+                    value="排名",
+                    label="排序列",
+                    scale=2
+                )
+                llm_sort_order = gr.Radio(
+                    choices=[("升序 ↑", "asc"), ("降序 ↓", "desc")],
+                    value="desc",
+                    label="排序方式",
+                    scale=1
+                )
+                with gr.Column(scale=1):
+                    llm_default_sort_btn = gr.Button("↩️ 默认排序", variant="primary", scale=1)
+            # 置顶控制
+            with gr.Row():
+                llm_pin_input = gr.Textbox(
+                    label="置顶行号（用逗号分隔多个行号，如：1,3,5）",
+                    placeholder="输入要置顶的行号",
+                    scale=2
+                )
+                with gr.Column(scale=1):
+                    llm_pin_btn = gr.Button("📌 应用置顶", variant="primary", scale=1)
+                    llm_clear_pin_btn = gr.Button("🗑️ 清除置顶", variant="primary", scale=1)
+            # 置顶状态（隐藏）
+            llm_pinned_state = gr.State(value=set())
+            # 使用HTML组件显示带链接的表格
+            llm_leaderboard_html = gr.HTML(
+                value=generate_llm_html_table(pinned_rows=set()),
+                label="医疗大语言模型排行榜"
+            )
+        # 医疗多模态大模型排行榜标签页
+        with gr.TabItem("👁️ 医疗多模态大模型排��榜"):
+            # 筛选选项放在上方
+            with gr.Row():
+                multimodal_type_filter = gr.Dropdown(
+                    choices=["全部", "开源", "商业"],
+                    value="全部",
+                    label="模型类型",
+                    scale=1
+                )
+                multimodal_min_score = gr.Slider(
+                    minimum=0,
+                    maximum=80,
+                    value=0,
+                    step=5,
+                    label="最低平均分",
+                    scale=2
+                )
+                multimodal_refresh_btn = gr.Button("🔄 刷新数据", variant="primary", scale=1)
+            # 排序选项
+            with gr.Row():
+                sort_column = gr.Dropdown(
+                    choices=["排名", "模型名称", "平均分", "MMMU-Med", "VQA-RAD", "SLAKE", "PathVQA", "PMC-VQA", "OMVQA", "MedXQA", "最后更新", "类型"],
+                    value="排名",
+                    label="排序列",
+                    scale=2
+                )
+                sort_order = gr.Radio(
+                    choices=[("升序 ↑", "asc"), ("降序 ↓", "desc")],
+                    value="desc",
+                    label="排序方式",
+                    scale=1
+                )
+                with gr.Column(scale=1):
+                    # sort_btn = gr.Button("🔄 应用排序", variant="secondary", scale=1)
+                    default_sort_btn = gr.Button("↩️ 默认排序", variant="primary", scale=1)
+            # 置顶控制
+            with gr.Row():
+                pin_input = gr.Textbox(
+                    label="置顶行号（用逗号分隔多个行号，如：1,3,5）",
+                    placeholder="输入要置顶的行号",
+                    scale=2
+                )
+                with gr.Column(scale=1):
+                    pin_btn = gr.Button("📌 应用置顶", variant="primary", scale=1)
+                    clear_pin_btn = gr.Button("🗑️ 清除置顶", variant="primary", scale=1)
+            # 置顶状态（隐藏）
+            pinned_state = gr.State(value=set())
+            # 使用HTML组件显示带链接的表格
+            multimodal_leaderboard_html = gr.HTML(
+                value=generate_multimodal_html_table(pinned_rows=set()),
+                label="医疗多模态大模型排行榜"
+            )
+    # 事件处理函数
+    def update_llm_leaderboard_html(type_filter, min_score, sort_col=None, sort_ord="desc", pinned_rows=None):
+        if pinned_rows is None:
+            pinned_rows = set()
+        filtered_df = filter_llm_leaderboard(type_filter, min_score)
+        return generate_llm_html_table(filtered_df, sort_col, sort_ord, pinned_rows)
+    def sort_llm_table(type_filter, min_score, sort_col, sort_ord, pinned_rows):
+        filtered_df = filter_llm_leaderboard(type_filter, min_score)
+        return generate_llm_html_table(filtered_df, sort_col, sort_ord, pinned_rows)
+    def default_sort_llm_table(type_filter, min_score, pinned_rows):
+        """恢复默认排序（按平均分排名）"""
+        filtered_df = filter_llm_leaderboard(type_filter, min_score)
+        html_table = generate_llm_html_table(filtered_df, None, "desc", pinned_rows)
+        # 返回表格和重置后的排序选项
+        return html_table, "排名", "desc"
+    def apply_llm_pin(pin_input_text, type_filter, min_score, sort_col, sort_ord, current_pinned):
+        """应用置顶设置"""
+        try:
+            if pin_input_text.strip():
+                # 解析输入的排名号（用户输入的是1,2,3...）
+                pin_numbers = [int(x.strip()) for x in pin_input_text.split(',') if x.strip()]
+                # 直接使用排名号，不需要转换
+                new_pinned = set(n for n in pin_numbers if n > 0)
+            else:
+                new_pinned = set()
+            filtered_df = filter_llm_leaderboard(type_filter, min_score)
+            html_table = generate_llm_html_table(filtered_df, sort_col, sort_ord, new_pinned)
+            return html_table, new_pinned
+        except ValueError:
+            # 如果输入格式错误，保持当前状态
+            filtered_df = filter_llm_leaderboard(type_filter, min_score)
+            html_table = generate_llm_html_table(filtered_df, sort_col, sort_ord, current_pinned)
+            return html_table, current_pinned
+    def clear_llm_pin(type_filter, min_score, sort_col, sort_ord):
+        """清除所有置顶"""
+        filtered_df = filter_llm_leaderboard(type_filter, min_score)
+        html_table = generate_llm_html_table(filtered_df, sort_col, sort_ord, set())
+        return html_table, set(), ""
+    def update_multimodal_leaderboard_html(type_filter, min_score, sort_col=None, sort_ord="desc", pinned_rows=None):
+        if pinned_rows is None:
+            pinned_rows = set()
+        filtered_df = filter_multimodal_leaderboard(type_filter, min_score)
+        return generate_multimodal_html_table(filtered_df, sort_col, sort_ord, pinned_rows)
+    def sort_multimodal_table(type_filter, min_score, sort_col, sort_ord, pinned_rows):
+        filtered_df = filter_multimodal_leaderboard(type_filter, min_score)
+        return generate_multimodal_html_table(filtered_df, sort_col, sort_ord, pinned_rows)
+    def default_sort_multimodal_table(type_filter, min_score, pinned_rows):
+        """恢复默认排序（按平均分排名）"""
+        filtered_df = filter_multimodal_leaderboard(type_filter, min_score)
+        html_table = generate_multimodal_html_table(filtered_df, None, "desc", pinned_rows)
+        # 返回表格和重置后的排序选项
+        return html_table, "排名", "desc"
+    def apply_pin(pin_input_text, type_filter, min_score, sort_col, sort_ord, current_pinned):
+        """应用置顶设置"""
+        try:
+            if pin_input_text.strip():
+                # 解析输入的排名号（用户输入的是1,2,3...）
+                pin_numbers = [int(x.strip()) for x in pin_input_text.split(',') if x.strip()]
+                # 直接使用排名号，不需要转换
+                new_pinned = set(n for n in pin_numbers if n > 0)
+            else:
+                new_pinned = set()
+            filtered_df = filter_multimodal_leaderboard(type_filter, min_score)
+            html_table = generate_multimodal_html_table(filtered_df, sort_col, sort_ord, new_pinned)
+            return html_table, new_pinned
+        except ValueError:
+            # 如果输入格式错误，保持当前状态
+            filtered_df = filter_multimodal_leaderboard(type_filter, min_score)
+            html_table = generate_multimodal_html_table(filtered_df, sort_col, sort_ord, current_pinned)
+            return html_table, current_pinned
+    def clear_pin(type_filter, min_score, sort_col, sort_ord):
+        """清除所有置顶"""
+        filtered_df = filter_multimodal_leaderboard(type_filter, min_score)
+        html_table = generate_multimodal_html_table(filtered_df, sort_col, sort_ord, set())
+        return html_table, set(), ""
+    # 绑定事件 - 医疗大语言模型
+    llm_type_filter.change(
+        fn=sort_llm_table,
+        inputs=[llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order, llm_pinned_state],
+        outputs=llm_leaderboard_html
+    )
+    llm_min_score.change(
+        fn=sort_llm_table,
+        inputs=[llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order, llm_pinned_state],
+        outputs=llm_leaderboard_html
+    )
+    llm_refresh_btn.click(
+        fn=sort_llm_table,
+        inputs=[llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order, llm_pinned_state],
+        outputs=llm_leaderboard_html
+    )
+    # 排序功能绑定 - 医疗大语言模型
+    llm_sort_column.change(
+        fn=sort_llm_table,
+        inputs=[llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order, llm_pinned_state],
+        outputs=llm_leaderboard_html
+    )
+    llm_sort_order.change(
+        fn=sort_llm_table,
+        inputs=[llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order, llm_pinned_state],
+        outputs=llm_leaderboard_html
+    )
+    # 默认排序按钮绑定 - 医疗大语言模型
+    llm_default_sort_btn.click(
+        fn=default_sort_llm_table,
+        inputs=[llm_type_filter, llm_min_score, llm_pinned_state],
+        outputs=[llm_leaderboard_html, llm_sort_column, llm_sort_order]
+    )
+    # 置顶功能绑定 - 医疗大语言模型
+    llm_pin_btn.click(
+        fn=apply_llm_pin,
+        inputs=[llm_pin_input, llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order, llm_pinned_state],
+        outputs=[llm_leaderboard_html, llm_pinned_state]
+    )
+    llm_clear_pin_btn.click(
+        fn=clear_llm_pin,
+        inputs=[llm_type_filter, llm_min_score, llm_sort_column, llm_sort_order],
+        outputs=[llm_leaderboard_html, llm_pinned_state, llm_pin_input]
+    )
+    # 绑定事件 - 医疗多模态大模型
+    multimodal_type_filter.change(
+        fn=sort_multimodal_table,
+        inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+        outputs=multimodal_leaderboard_html
+    )
+    multimodal_min_score.change(
+        fn=sort_multimodal_table,
+        inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+        outputs=multimodal_leaderboard_html
+    )
+    multimodal_refresh_btn.click(
+        fn=sort_multimodal_table,
+        inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+        outputs=multimodal_leaderboard_html
+    )
+    # 排序功能绑定
+    # sort_btn.click(
+    #     fn=sort_multimodal_table,
+    #     inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+    #     outputs=multimodal_leaderboard_html
+    # )
+    sort_column.change(
+        fn=sort_multimodal_table,
+        inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+        outputs=multimodal_leaderboard_html
+    )
+    sort_order.change(
+        fn=sort_multimodal_table,
+        inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+        outputs=multimodal_leaderboard_html
+    )
+    # 默认排序按钮绑定
+    default_sort_btn.click(
+        fn=default_sort_multimodal_table,
+        inputs=[multimodal_type_filter, multimodal_min_score, pinned_state],
+        outputs=[multimodal_leaderboard_html, sort_column, sort_order]
+    )
+    # 置顶功能绑定
+    pin_btn.click(
+        fn=apply_pin,
+        inputs=[pin_input, multimodal_type_filter, multimodal_min_score, sort_column, sort_order, pinned_state],
+        outputs=[multimodal_leaderboard_html, pinned_state]
+    )
+    clear_pin_btn.click(
+        fn=clear_pin,
+        inputs=[multimodal_type_filter, multimodal_min_score, sort_column, sort_order],
+        outputs=[multimodal_leaderboard_html, pinned_state, pin_input]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        # server_name="0.0.0.0",
+        # server_port=7863,
+        share=False,
+        show_error=True
+    )

metadata/medical_data.json ADDED Viewed

	@@ -0,0 +1,16 @@

+[
+    {
+      "模型名称": "GPT-4.1",
+      "MMMU-Med": 89.6,
+      "VQA-RAD": 75.6,
+      "SLAKE": 77.7,
+      "PathVQA": 89.1,
+      "PMC-VQA": 77.0,
+      "OMVQA": 30.9,
+      "MedXQA": 49.9,
+      "平均分": 70.0,
+      "最后更新": "2025-07-14",
+      "类型": "商业",
+      "使用链接": "https://www.google.com/ "
+    }
+]

metadata/medical_mm_data.json ADDED Viewed

	@@ -0,0 +1,310 @@

+[
+    {
+      "模型名称": "GPT-4.1",
+      "MMMU-Med": 75.2,
+      "VQA-RAD": 65.0,
+      "SLAKE": 72.2,
+      "PathVQA": 55.5,
+      "PMC-VQA": 55.2,
+      "OMVQA": 75.5,
+      "MedXQA": 45.2,
+      "平均分": 63.4,
+      "最后更新": "2025-07-14",
+      "类型": "商业",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Claude Sonnet 4",
+      "MMMU-Med": 74.6,
+      "VQA-RAD": 67.6,
+      "SLAKE": 70.6,
+      "PathVQA": 54.2,
+      "PMC-VQA": 54.4,
+      "OMVQA": 65.5,
+      "MedXQA": 43.3,
+      "平均分": 61.5,
+      "最后更新": "2025-07-14",
+      "类型": "商业",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Gemini-2.5-Flash",
+      "MMMU-Med": 76.9,
+      "VQA-RAD": 68.5,
+      "SLAKE": 75.8,
+      "PathVQA": 55.4,
+      "PMC-VQA": 55.4,
+      "OMVQA": 71.0,
+      "MedXQA": 52.8,
+      "平均分": 65.1,
+      "最后更新": "2025-07-14",
+      "类型": "商业",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "BiomedGPT♡",
+      "MMMU-Med": 24.9,
+      "VQA-RAD": 16.6,
+      "SLAKE": 13.6,
+      "PathVQA": 11.3,
+      "PMC-VQA": 27.6,
+      "OMVQA": 27.9,
+      "MedXQA": null,
+      "平均分": null,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Med-R1-2B◇",
+      "MMMU-Med": 34.8,
+      "VQA-RAD": 39.6,
+      "SLAKE": 54.5,
+      "PathVQA": 15.3,
+      "PMC-VQA": 47.4,
+      "OMVQA": null,
+      "MedXQA": 21.1,
+      "平均分": null,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "MedVLM-R1-2B",
+      "MMMU-Med": 42.6,
+      "VQA-RAD": 48.6,
+      "SLAKE": 56.0,
+      "PathVQA": 32.5,
+      "PMC-VQA": 47.6,
+      "OMVQA": 77.7,
+      "MedXQA": 20.4,
+      "平均分": 45.4,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "MedGemma-4B-IT",
+      "MMMU-Med": 43.7,
+      "VQA-RAD": 72.5,
+      "SLAKE": 76.4,
+      "PathVQA": 48.8,
+      "PMC-VQA": 49.9,
+      "OMVQA": 69.8,
+      "MedXQA": 22.3,
+      "平均分": 54.8,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "LLaVA-Med-7B",
+      "MMMU-Med": 29.3,
+      "VQA-RAD": 37.7,
+      "SLAKE": 48.0,
+      "PathVQA": 38.8,
+      "PMC-VQA": 30.5,
+      "OMVQA": 44.3,
+      "MedXQA": 20.3,
+      "平均分": 37.8,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "HuatuoGPT-V-7B",
+      "MMMU-Med": 47.3,
+      "VQA-RAD": 30.0,
+      "SLAKE": 67.8,
+      "PathVQA": 48.0,
+      "PMC-VQA": 53.3,
+      "OMVQA": 74.2,
+      "MedXQA": 21.6,
+      "平均分": 54.2,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "BioMediX2-8B",
+      "MMMU-Med": 39.8,
+      "VQA-RAD": 44.9,
+      "SLAKE": 57.7,
+      "PathVQA": 37.0,
+      "PMC-VQA": 43.5,
+      "OMVQA": 63.3,
+      "MedXQA": 21.8,
+      "平均分": 44.6,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Qwen2.5VL-7B",
+      "MMMU-Med": 50.0,
+      "VQA-RAD": 50.6,
+      "SLAKE": 47.4,
+      "PathVQA": 44.1,
+      "PMC-VQA": 51.9,
+      "OMVQA": 63.6,
+      "MedXQA": 22.3,
+      "平均分": 50.0,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "InternVL2.5-8B",
+      "MMMU-Med": 53.5,
+      "VQA-RAD": 59.4,
+      "SLAKE": 69.0,
+      "PathVQA": 42.1,
+      "PMC-VQA": 51.3,
+      "OMVQA": 81.3,
+      "MedXQA": 21.7,
+      "平均分": 54.0,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "InternVL3-8B",
+      "MMMU-Med": 59.2,
+      "VQA-RAD": 61.4,
+      "SLAKE": 72.8,
+      "PathVQA": 48.6,
+      "PMC-VQA": 53.8,
+      "OMVQA": 79.1,
+      "MedXQA": 22.4,
+      "平均分": 57.3,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Lingshu-7B",
+      "MMMU-Med": 54.0,
+      "VQA-RAD": 67.9,
+      "SLAKE": 83.1,
+      "PathVQA": 61.9,
+      "PMC-VQA": 56.3,
+      "OMVQA": 82.9,
+      "MedXQA": 26.7,
+      "平均分": 61.8,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "HealthGPT-14B",
+      "MMMU-Med": 49.6,
+      "VQA-RAD": 65.0,
+      "SLAKE": 66.1,
+      "PathVQA": 56.7,
+      "PMC-VQA": 56.4,
+      "OMVQA": 75.2,
+      "MedXQA": 24.7,
+      "平均分": 56.2,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "HuatuoGPT-V-34B",
+      "MMMU-Med": 51.8,
+      "VQA-RAD": 61.4,
+      "SLAKE": 69.5,
+      "PathVQA": 44.4,
+      "PMC-VQA": 56.6,
+      "OMVQA": 74.0,
+      "MedXQA": 22.1,
+      "平均分": 54.3,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "MedDr-40B♡",
+      "MMMU-Med": 25.2,
+      "VQA-RAD": 42.0,
+      "SLAKE": 53.5,
+      "PathVQA": 13.9,
+      "PMC-VQA": 64.3,
+      "OMVQA": null,
+      "MedXQA": null,
+      "平均分": null,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "InternVL3-14B",
+      "MMMU-Med": 63.1,
+      "VQA-RAD": 66.1,
+      "SLAKE": 72.8,
+      "PathVQA": 48.0,
+      "PMC-VQA": 54.1,
+      "OMVQA": 78.9,
+      "MedXQA": 23.1,
+      "平均分": 58.0,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Qwen2.5V-32B",
+      "MMMU-Med": 59.6,
+      "VQA-RAD": 71.2,
+      "SLAKE": 71.2,
+      "PathVQA": 41.9,
+      "PMC-VQA": 54.5,
+      "OMVQA": 68.8,
+      "MedXQA": 25.2,
+      "平均分": 56.1,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "InternVL2.5-38B",
+      "MMMU-Med": 64.5,
+      "VQA-RAD": 70.6,
+      "SLAKE": 75.2,
+      "PathVQA": 57.2,
+      "PMC-VQA": 79.9,
+      "OMVQA": 24.4,
+      "MedXQA": 41.1,
+      "平均分": 59.0,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "InternVL3-38B",
+      "MMMU-Med": 65.2,
+      "VQA-RAD": 72.7,
+      "SLAKE": 71.0,
+      "PathVQA": 51.0,
+      "PMC-VQA": 56.6,
+      "OMVQA": 79.8,
+      "MedXQA": 25.2,
+      "平均分": 59.4,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    },
+    {
+      "模型名称": "Lingshu-32B",
+      "MMMU-Med": 62.3,
+      "VQA-RAD": 76.5,
+      "SLAKE": 89.2,
+      "PathVQA": 65.9,
+      "PMC-VQA": 57.9,
+      "OMVQA": 83.4,
+      "MedXQA": 30.9,
+      "平均分": 66.6,
+      "最后更新": "2025-07-14",
+      "类型": "开源",
+      "使用链接": "https://www.google.com/ "
+    }
+  ]

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==5.9.1
+pandas>=1.5.0
+numpy>=1.24.0