File size: 12,895 Bytes

e490e7e

import os
import os.path as osp
from glob import glob
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from tensorboard.backend.event_processing import event_accumulator

# plt.style.use("seaborn-v0_8")   # seaborn 风格
# plt.style.use("ggplot")   # seaborn 风格
plt.style.use("classic")      # 经典风格
# plt.style.use("bmh")          # Bayesian Methods for Hackers 风格
plt.rcParams["font.family"] = "Times New Roman"


def smooth(values, weight=0.6):
    """ 
    指数滑动平均 (Exponential Moving Average, EMA) 
    weight 越大越平滑，0~1之间
    """
    smoothed = []
    last = values[0]
    for v in values:
        last = last * weight + (1 - weight) * v
        smoothed.append(last)
    return np.array(smoothed)


def read_event_scalar(event_file, tag):
    """
    从 event 文件中读取某个 tag 的标量数据
    """
    ea = event_accumulator.EventAccumulator(event_file)
    ea.Reload()
    if tag not in ea.Tags()["scalars"]:
        raise ValueError(f"Tag {tag} not found in {event_file}, available: {ea.Tags()['scalars']}")
    events = ea.Scalars(tag)
    steps = [e.step for e in events]
    values = [e.value for e in events]
    return steps, values


def plot_multiple_events(event_files, tags, save_path, fontsize=18):
    """
    在一张图中绘制多个 event 文件的同一个 tag 曲线
    """
    # plt.figure(figsize=(8, 10))
    plt.figure(figsize=(6, 8))
    for i, tag in enumerate(tags):
        plt.subplot(len(tags), 1, i + 1)
        max_val, max_cfg = -1, None
        for j, event_file in enumerate(event_files):
            label = osp.basename(osp.dirname(osp.dirname(event_file)))
            steps, values = read_event_scalar(event_file, tag)
            if max_val < np.max(values):
                max_val = np.max(values)
                max_cfg = label
            values = smooth(values, weight=0.6)
            plt.plot(steps, values, label=label)

        plt.title(tag, fontsize=fontsize)
        plt.xlabel("Seen Prompts", fontsize=fontsize-2)
        # plt.ylabel(tag)
        plt.xticks(steps[::10], [f'{step * 40 / 1000:.1f}k' for step in steps[::10]], fontsize=fontsize-2)
        plt.legend(loc='lower right', fontsize=fontsize-4)
        plt.grid(True)
        plt.tight_layout()
        print(f"Max {tag}: {max_val:.4f} in {max_cfg}")

    plt.savefig(save_path, dpi=400)

    # plt.show()


def plot_dpo_implicit_acc_beta():
    event_files = glob('exps/audio_video/ablation/dpo/beta/*/tensorboard/events.out.tfevents.*')
    event_files = sorted(event_files, key=lambda x: int(osp.basename(osp.dirname(osp.dirname(x))).split('_')[-1]))
    event_files = [path for path in event_files if '300/' not in path and '700/' not in path]
    tags = ["implicit_acc_audio", "implicit_acc_video"]
    plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_beta.pdf')


def plot_dpo_implicit_acc_lr():
    event_files = [
        glob(f'exps/audio_video/ablation/dpo/lr/lr_{lr}/tensorboard/events.out.tfevents.*')[0] for lr in ['1e-5', '5e-6', '1e-6']
    ]
    tags = ["implicit_acc_audio", "implicit_acc_video"]
    plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_lr.pdf')


def plot_perform_radar():
    # 任务维度和范围
    labels = ["V-Quality", "A-Quality", "TV-Align", "TA-Align", "AV-Align", "AV-Sync"]
    ranges = [(0, 3.0),  (3.8, 5.2),  (2, 3.19), (1, 1.98),   (0, 2.8),   (0.4, 1.35)]

    # 示例分数
    scores = {
        "JavisDiT":   [1.02, 4.28, 2.6,  1.3,  1.8, 0.75],
        "UniVerse-1": [1.52, 4.09, 2.9,  1.1,  1.0, 0.80],
        "Ours":       [2.47, 4.91, 3.0, 1.66, 1.92, 1.02],
        "Veo-3":      [2.84, 5.11, 3.1,  1.9,  2.6, 1.28],
    }

    # 归一化函数
    def normalize(values, ranges, labels):
        return [(v - ranges[l][0]) / (ranges[l][1] - ranges[l][0]) for l, v in enumerate(values)]

    # 雷达图准备
    N = len(labels)
    angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
    angles += angles[:1]

    fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
    fontsize = 16

    ax.set_theta_offset(np.pi/2)     # 设置起始角度 (0° 对应 π/2 → 90°)
    ax.set_theta_direction(1)        # 顺时针 = -1，逆时针 = 1

    # 绘制模型曲线
    for name, values in scores.items():
        norm_values = normalize(values, ranges, labels)
        norm_values += norm_values[:1]
        ax.plot(angles, norm_values, label=name, linewidth=1.5 if name in ['Ours', 'Veo-3'] else 1)
        ax.fill(angles, norm_values, alpha=0.1)

    # 关闭默认的 y 轴刻度
    ax.set_ylim(0, 1)
    ax.set_yticks([0.25,0.5,0.75])
    ax.set_yticklabels([])   # 不显示标签，只保留圈

    # ✅ 手动为每个维度绘制原始刻度
    for i, angle in enumerate(angles[:-1]):
        rmin, rmax = ranges[i]
        ticks = np.linspace(rmin, rmax, 5)[1:-1]  # 5-2=3 个刻度
        for t in ticks:
            rt = (t - rmin) / (rmax - rmin)  # normalize
            ax.text(angle, rt+0.03, f"{t:.1f}", ha="center", va="center", fontsize=fontsize-2, color="gray")

    # 设置轴标签
    ax.set_xticks(angles[:-1])
    labels_text = ax.set_xticklabels(labels, fontsize=fontsize, fontweight="bold")

    # ⭐ 关键修改：为每个标签设置一个新的 y 坐标 (在极坐标中是径向距离 r)
    # 这里的 1.1 表示将标签放置在半径为 1.1 的位置，而不是默认的 1
    # 你可以根据需要调整这个值，使其不与最外圈重合
    for i, (label, angle) in enumerate(zip(labels_text, angles[:-1])):
        # print(label.get_position())
        if i not in [0, 3]:  # 0 和 3 的位置比较特殊，单独调整
            label.set_position((angle, -0.12)) # 设置标签的位置offset
        label.set_ha('center') # 水平居中
        label.set_va('center') # 垂直居中，根据需要也可以调整为 'top' 或 'bottom'
        
    ax.legend(loc="upper right", bbox_to_anchor=(1.17, 1.15), fontsize=fontsize-2)
    plt.tight_layout()
    plt.savefig('./debug/plot/perform_radar.pdf')


def plot_ablation_bar():
    # 模拟数据：6个任务，每个任务的数值范围不同
    tasks = ["FVD ↓", "FAD ↓" , "TV-IB ↑", "TA-IB ↑", "AV-IB ↑", "JavisScore ↑", "DeSync ↓"]
    models = [
        "A-LoRA + AV-LoRA (r=64)", 
        "A-noLoRA + AV-AttnLoRA (r=64)", 
        "A-noLoRA + AV-LoRA (r=64)", 
        "A-noLoRA + AV-LoRA (r=32)", 
        "A-noLoRA + AV-LoRA (r=128)"
    ]

    # 原始分数 (注意：每个任务数值范围不同)
    data = np.array([
        [311.6, 223.1, 221.3, 222.5, 218.6],
        [ 5.80,  5.66,  5.51,  5.54,  5.60],
        [ 16.2,  28.1,  28.3,  28.3,  28.2],
        [ 14.2,  14.7,  15.3,  15.2,  14.7],
        [ 12.6,  18.6,  19.4,  19.2,  18.0],
        [  9.1,  14.1,  15.1,  14.7,  14.3],
        [ 96.9,  95.8,  90.1,  90.0,  90.1],
    ])
    data_range = np.array([
        [200, 350], 
        [5, 6], 
        [10, 35], 
        [10, 18], 
        [10, 25], 
        [5, 20], 
        [85, 100],
    ])

    # ---- 步骤1: 每个任务单独归一化到 [0,1] ----
    # norm_data = (data - data.min(axis=1, keepdims=True)) / \
    #             (data.max(axis=1, keepdims=True) - data.min(axis=1, keepdims=True))
    norm_data = (data - data_range[:, :1]) / (data_range[:, 1:] - data_range[:, :1])

    # ---- 步骤2: 画图 ----
    x = np.arange(len(tasks))  # 每个任务的位置
    bar_width = 0.15
    fontsize = 18

    colors = ["grey", "lightgrey", "royalblue", "cornflowerblue", "lightsteelblue"]
    hatches = [None, None, "//", None, None]  # 突出

    fig, ax = plt.subplots(figsize=(11.2,4))

    # 将网格放在最下层，并且非常淡
    ax.set_axisbelow(True)
    ax.yaxis.grid(True, linestyle='--', linewidth=0.7, alpha=0.3, color='gray', zorder=0)

    for i, model in enumerate(models):
        bars = ax.bar(
            x + i*bar_width - (len(models)-1)/2*bar_width,  # 居中
            norm_data[:, i],
            width=bar_width,
            label=model,
            color=colors[i],
            hatch=hatches[i],
            edgecolor="white",
            linewidth=0.5
        )
        # # 在柱子上方标注原始值
        # for j, bar in enumerate(bars):
        #     ax.text(
        #         bar.get_x() + bar.get_width()/2,
        #         bar.get_height() + 0.02,   # 稍微高于柱子
        #         f"{data[j, i]:.1f}",      # 原始值
        #         ha="center", va="bottom", fontsize=fontsize-4
        #     )

    # ---- 步骤3: 样式美化 ----
    ax.set_xticks(x)
    ax.set_xlim(x[0]-0.5, x[-1]+0.5)
    ax.set_xticklabels(tasks, fontsize=fontsize)
    ax.set_ylim(0, 1)   # 因为归一化
    ax.set_yticklabels([])  # 不显示 y 轴刻度
    
    # 图例放在上方
    ax.legend(
        loc="upper center", bbox_to_anchor=(0.5, 1.3),
        ncol=int(math.ceil(len(models)/2)), frameon=False, fontsize=fontsize-3
    )

    plt.tight_layout()
    plt.subplots_adjust(left=0.02, right=0.98, top=0.8, bottom=0.1)  # 调整边界范围
    plt.savefig('./debug/plot/lora_cfg_bar.pdf')


def plot_data_filtering():
    # 定义节点的顺序，以便后面设置坐标
    labels = [
        # 第 1 列 (x=0)
        "Raw Videos",                                   # 0
        # 第 2 列 (x=0.33)
        "Clean Videos",                                 # 1
        "Speech Videos",                                # 2
        # 第 3 列 (x=0.66)
        "HQ Videos",                                    # 3
        "Scoring Filters",                              # 4
        # 第 4 列 (x=1)
        "SFT",                                          # 5
        "DPO",                                          # 6
    ]

    # 节点颜色也需要对应新的顺序
    colors = [
        "purple", "blue", "grey", "darkgreen", "grey", "gold", "brown"
    ]


    # 2. ⭐ 核心改动：为每个节点手动指定 x, y 坐标
    #    x 控制列，y 控制行。所有值都在 0 到 1 之间。
    node_x = [
        0.01,       # Raw Videos
        0.33, 0.33, # Clean Videos, Speech Videos
        0.66, 0.66, # HQ Videos, Scoring Filters
        0.99, 0.99, # SFT, DPO
    ]
    node_y = [
        0.3,        # Raw Videos
        0.2, 0.7,   # Clean Videos, Speech Videos
        0.1, 0.45,   # HQ Videos, Scoring Filters
        0.1, 0.3,   # SFT, DPO
    ]


    # 3. 修正数据流 (link)，使其与参考图的流向和流量匹配
    #    这里的 value 是我根据图上流量的粗细估算的，你可以换成真实数据
    links = {
        'source': [0,  0,  1,  1,  3,  3],
        'target': [1,  2,  3,  4,  5,  6],
        'value':  [66, 34, 33, 33, 29, 4],
        # ⭐ 美化改动：设置连接线的颜色和透明度
        'color':  ['rgba(128, 128, 128, 0.3)'] * 6
    }


    # 4. 创建图表
    fig = go.Figure(data=[go.Sankey(
        # arrangement 已被 x, y 坐标取代，可以移除
        node=dict(
            pad=25,
            thickness=30,
            line=dict(color="black", width=0.5),
            label=labels,
            color=colors,
            # 指定坐标
            x=node_x,
            y=node_y
        ),
        link=dict(
            source=links['source'],
            target=links['target'],
            value=links['value'],
            color=links['color']
        )
    )])

    fig.update_layout(
        # title_text="视频数据处理流程桑基图",
        font_size=16, # 你可以保留这个作为基础字号
        height=600,
        # ⭐ 新增的代码 ⭐
        font=dict(
            family="Times New Roman",  # 设置字体族
            size=16,                   # 可以在这里统一设置字号
            color="black"              # 设置字体颜色
        )
    )

    # fig.show()
    fig.write_image('./debug/plot/data_filtering.pdf')


def stat_audio_data_dist():
    data_root = '/mnt/HithinkOmniSSD/user_workspace/liukai4/datasets/JavisDiT/train/audio'
    df = pd.read_csv(f'{data_root}/JavisDiT_train_audio_v1.csv')
    subsets = []
    for subset in os.listdir(data_root):
        if osp.isdir(f'{data_root}/{subset}'):
            subsets.append(subset)
    stat = []
    for subset in subsets:
        stat.append(df['audio_path'].str.contains(subset).sum())
    stat = np.array(stat)
    indices = np.argsort(-stat)

    subsets = [subsets[i] for i in indices]
    stat = stat[indices]
    rel_stat = stat / stat.sum()
    for i in range(len(subsets)):
        print(f'{subsets[i]:<10} {stat[i]:>5} {rel_stat[i]:.2%}')
    

if __name__ == "__main__":
    # plot_dpo_implicit_acc_beta()
    # plot_dpo_implicit_acc_lr()

    # plot_perform_radar()

    plot_ablation_bar()

    # plot_data_filtering()
    # stat_audio_data_dist()

    pass