import os import os.path as osp from glob import glob import math import numpy as np import pandas as pd import matplotlib.pyplot as plt import plotly.graph_objects as go from tensorboard.backend.event_processing import event_accumulator # plt.style.use("seaborn-v0_8") # seaborn 风格 # plt.style.use("ggplot") # seaborn 风格 plt.style.use("classic") # 经典风格 # plt.style.use("bmh") # Bayesian Methods for Hackers 风格 plt.rcParams["font.family"] = "Times New Roman" def smooth(values, weight=0.6): """ 指数滑动平均 (Exponential Moving Average, EMA) weight 越大越平滑,0~1之间 """ smoothed = [] last = values[0] for v in values: last = last * weight + (1 - weight) * v smoothed.append(last) return np.array(smoothed) def read_event_scalar(event_file, tag): """ 从 event 文件中读取某个 tag 的标量数据 """ ea = event_accumulator.EventAccumulator(event_file) ea.Reload() if tag not in ea.Tags()["scalars"]: raise ValueError(f"Tag {tag} not found in {event_file}, available: {ea.Tags()['scalars']}") events = ea.Scalars(tag) steps = [e.step for e in events] values = [e.value for e in events] return steps, values def plot_multiple_events(event_files, tags, save_path, fontsize=18): """ 在一张图中绘制多个 event 文件的同一个 tag 曲线 """ # plt.figure(figsize=(8, 10)) plt.figure(figsize=(6, 8)) for i, tag in enumerate(tags): plt.subplot(len(tags), 1, i + 1) max_val, max_cfg = -1, None for j, event_file in enumerate(event_files): label = osp.basename(osp.dirname(osp.dirname(event_file))) steps, values = read_event_scalar(event_file, tag) if max_val < np.max(values): max_val = np.max(values) max_cfg = label values = smooth(values, weight=0.6) plt.plot(steps, values, label=label) plt.title(tag, fontsize=fontsize) plt.xlabel("Seen Prompts", fontsize=fontsize-2) # plt.ylabel(tag) plt.xticks(steps[::10], [f'{step * 40 / 1000:.1f}k' for step in steps[::10]], fontsize=fontsize-2) plt.legend(loc='lower right', fontsize=fontsize-4) plt.grid(True) plt.tight_layout() print(f"Max {tag}: {max_val:.4f} in {max_cfg}") plt.savefig(save_path, dpi=400) # plt.show() def plot_dpo_implicit_acc_beta(): event_files = glob('exps/audio_video/ablation/dpo/beta/*/tensorboard/events.out.tfevents.*') event_files = sorted(event_files, key=lambda x: int(osp.basename(osp.dirname(osp.dirname(x))).split('_')[-1])) event_files = [path for path in event_files if '300/' not in path and '700/' not in path] tags = ["implicit_acc_audio", "implicit_acc_video"] plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_beta.pdf') def plot_dpo_implicit_acc_lr(): event_files = [ glob(f'exps/audio_video/ablation/dpo/lr/lr_{lr}/tensorboard/events.out.tfevents.*')[0] for lr in ['1e-5', '5e-6', '1e-6'] ] tags = ["implicit_acc_audio", "implicit_acc_video"] plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_lr.pdf') def plot_perform_radar(): # 任务维度和范围 labels = ["V-Quality", "A-Quality", "TV-Align", "TA-Align", "AV-Align", "AV-Sync"] ranges = [(0, 3.0), (3.8, 5.2), (2, 3.19), (1, 1.98), (0, 2.8), (0.4, 1.35)] # 示例分数 scores = { "JavisDiT": [1.02, 4.28, 2.6, 1.3, 1.8, 0.75], "UniVerse-1": [1.52, 4.09, 2.9, 1.1, 1.0, 0.80], "Ours": [2.47, 4.91, 3.0, 1.66, 1.92, 1.02], "Veo-3": [2.84, 5.11, 3.1, 1.9, 2.6, 1.28], } # 归一化函数 def normalize(values, ranges, labels): return [(v - ranges[l][0]) / (ranges[l][1] - ranges[l][0]) for l, v in enumerate(values)] # 雷达图准备 N = len(labels) angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() angles += angles[:1] fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True)) fontsize = 16 ax.set_theta_offset(np.pi/2) # 设置起始角度 (0° 对应 π/2 → 90°) ax.set_theta_direction(1) # 顺时针 = -1,逆时针 = 1 # 绘制模型曲线 for name, values in scores.items(): norm_values = normalize(values, ranges, labels) norm_values += norm_values[:1] ax.plot(angles, norm_values, label=name, linewidth=1.5 if name in ['Ours', 'Veo-3'] else 1) ax.fill(angles, norm_values, alpha=0.1) # 关闭默认的 y 轴刻度 ax.set_ylim(0, 1) ax.set_yticks([0.25,0.5,0.75]) ax.set_yticklabels([]) # 不显示标签,只保留圈 # ✅ 手动为每个维度绘制原始刻度 for i, angle in enumerate(angles[:-1]): rmin, rmax = ranges[i] ticks = np.linspace(rmin, rmax, 5)[1:-1] # 5-2=3 个刻度 for t in ticks: rt = (t - rmin) / (rmax - rmin) # normalize ax.text(angle, rt+0.03, f"{t:.1f}", ha="center", va="center", fontsize=fontsize-2, color="gray") # 设置轴标签 ax.set_xticks(angles[:-1]) labels_text = ax.set_xticklabels(labels, fontsize=fontsize, fontweight="bold") # ⭐ 关键修改:为每个标签设置一个新的 y 坐标 (在极坐标中是径向距离 r) # 这里的 1.1 表示将标签放置在半径为 1.1 的位置,而不是默认的 1 # 你可以根据需要调整这个值,使其不与最外圈重合 for i, (label, angle) in enumerate(zip(labels_text, angles[:-1])): # print(label.get_position()) if i not in [0, 3]: # 0 和 3 的位置比较特殊,单独调整 label.set_position((angle, -0.12)) # 设置标签的位置offset label.set_ha('center') # 水平居中 label.set_va('center') # 垂直居中,根据需要也可以调整为 'top' 或 'bottom' ax.legend(loc="upper right", bbox_to_anchor=(1.17, 1.15), fontsize=fontsize-2) plt.tight_layout() plt.savefig('./debug/plot/perform_radar.pdf') def plot_ablation_bar(): # 模拟数据:6个任务,每个任务的数值范围不同 tasks = ["FVD ↓", "FAD ↓" , "TV-IB ↑", "TA-IB ↑", "AV-IB ↑", "JavisScore ↑", "DeSync ↓"] models = [ "A-LoRA + AV-LoRA (r=64)", "A-noLoRA + AV-AttnLoRA (r=64)", "A-noLoRA + AV-LoRA (r=64)", "A-noLoRA + AV-LoRA (r=32)", "A-noLoRA + AV-LoRA (r=128)" ] # 原始分数 (注意:每个任务数值范围不同) data = np.array([ [311.6, 223.1, 221.3, 222.5, 218.6], [ 5.80, 5.66, 5.51, 5.54, 5.60], [ 16.2, 28.1, 28.3, 28.3, 28.2], [ 14.2, 14.7, 15.3, 15.2, 14.7], [ 12.6, 18.6, 19.4, 19.2, 18.0], [ 9.1, 14.1, 15.1, 14.7, 14.3], [ 96.9, 95.8, 90.1, 90.0, 90.1], ]) data_range = np.array([ [200, 350], [5, 6], [10, 35], [10, 18], [10, 25], [5, 20], [85, 100], ]) # ---- 步骤1: 每个任务单独归一化到 [0,1] ---- # norm_data = (data - data.min(axis=1, keepdims=True)) / \ # (data.max(axis=1, keepdims=True) - data.min(axis=1, keepdims=True)) norm_data = (data - data_range[:, :1]) / (data_range[:, 1:] - data_range[:, :1]) # ---- 步骤2: 画图 ---- x = np.arange(len(tasks)) # 每个任务的位置 bar_width = 0.15 fontsize = 18 colors = ["grey", "lightgrey", "royalblue", "cornflowerblue", "lightsteelblue"] hatches = [None, None, "//", None, None] # 突出 fig, ax = plt.subplots(figsize=(11.2,4)) # 将网格放在最下层,并且非常淡 ax.set_axisbelow(True) ax.yaxis.grid(True, linestyle='--', linewidth=0.7, alpha=0.3, color='gray', zorder=0) for i, model in enumerate(models): bars = ax.bar( x + i*bar_width - (len(models)-1)/2*bar_width, # 居中 norm_data[:, i], width=bar_width, label=model, color=colors[i], hatch=hatches[i], edgecolor="white", linewidth=0.5 ) # # 在柱子上方标注原始值 # for j, bar in enumerate(bars): # ax.text( # bar.get_x() + bar.get_width()/2, # bar.get_height() + 0.02, # 稍微高于柱子 # f"{data[j, i]:.1f}", # 原始值 # ha="center", va="bottom", fontsize=fontsize-4 # ) # ---- 步骤3: 样式美化 ---- ax.set_xticks(x) ax.set_xlim(x[0]-0.5, x[-1]+0.5) ax.set_xticklabels(tasks, fontsize=fontsize) ax.set_ylim(0, 1) # 因为归一化 ax.set_yticklabels([]) # 不显示 y 轴刻度 # 图例放在上方 ax.legend( loc="upper center", bbox_to_anchor=(0.5, 1.3), ncol=int(math.ceil(len(models)/2)), frameon=False, fontsize=fontsize-3 ) plt.tight_layout() plt.subplots_adjust(left=0.02, right=0.98, top=0.8, bottom=0.1) # 调整边界范围 plt.savefig('./debug/plot/lora_cfg_bar.pdf') def plot_data_filtering(): # 定义节点的顺序,以便后面设置坐标 labels = [ # 第 1 列 (x=0) "Raw Videos", # 0 # 第 2 列 (x=0.33) "Clean Videos", # 1 "Speech Videos", # 2 # 第 3 列 (x=0.66) "HQ Videos", # 3 "Scoring Filters", # 4 # 第 4 列 (x=1) "SFT", # 5 "DPO", # 6 ] # 节点颜色也需要对应新的顺序 colors = [ "purple", "blue", "grey", "darkgreen", "grey", "gold", "brown" ] # 2. ⭐ 核心改动:为每个节点手动指定 x, y 坐标 # x 控制列,y 控制行。所有值都在 0 到 1 之间。 node_x = [ 0.01, # Raw Videos 0.33, 0.33, # Clean Videos, Speech Videos 0.66, 0.66, # HQ Videos, Scoring Filters 0.99, 0.99, # SFT, DPO ] node_y = [ 0.3, # Raw Videos 0.2, 0.7, # Clean Videos, Speech Videos 0.1, 0.45, # HQ Videos, Scoring Filters 0.1, 0.3, # SFT, DPO ] # 3. 修正数据流 (link),使其与参考图的流向和流量匹配 # 这里的 value 是我根据图上流量的粗细估算的,你可以换成真实数据 links = { 'source': [0, 0, 1, 1, 3, 3], 'target': [1, 2, 3, 4, 5, 6], 'value': [66, 34, 33, 33, 29, 4], # ⭐ 美化改动:设置连接线的颜色和透明度 'color': ['rgba(128, 128, 128, 0.3)'] * 6 } # 4. 创建图表 fig = go.Figure(data=[go.Sankey( # arrangement 已被 x, y 坐标取代,可以移除 node=dict( pad=25, thickness=30, line=dict(color="black", width=0.5), label=labels, color=colors, # 指定坐标 x=node_x, y=node_y ), link=dict( source=links['source'], target=links['target'], value=links['value'], color=links['color'] ) )]) fig.update_layout( # title_text="视频数据处理流程桑基图", font_size=16, # 你可以保留这个作为基础字号 height=600, # ⭐ 新增的代码 ⭐ font=dict( family="Times New Roman", # 设置字体族 size=16, # 可以在这里统一设置字号 color="black" # 设置字体颜色 ) ) # fig.show() fig.write_image('./debug/plot/data_filtering.pdf') def stat_audio_data_dist(): data_root = '/mnt/HithinkOmniSSD/user_workspace/liukai4/datasets/JavisDiT/train/audio' df = pd.read_csv(f'{data_root}/JavisDiT_train_audio_v1.csv') subsets = [] for subset in os.listdir(data_root): if osp.isdir(f'{data_root}/{subset}'): subsets.append(subset) stat = [] for subset in subsets: stat.append(df['audio_path'].str.contains(subset).sum()) stat = np.array(stat) indices = np.argsort(-stat) subsets = [subsets[i] for i in indices] stat = stat[indices] rel_stat = stat / stat.sum() for i in range(len(subsets)): print(f'{subsets[i]:<10} {stat[i]:>5} {rel_stat[i]:.2%}') if __name__ == "__main__": # plot_dpo_implicit_acc_beta() # plot_dpo_implicit_acc_lr() # plot_perform_radar() plot_ablation_bar() # plot_data_filtering() # stat_audio_data_dist() pass