kaiw7's picture
Upload folder using huggingface_hub
e490e7e verified
import os
import os.path as osp
from glob import glob
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from tensorboard.backend.event_processing import event_accumulator
# plt.style.use("seaborn-v0_8") # seaborn 风格
# plt.style.use("ggplot") # seaborn 风格
plt.style.use("classic") # 经典风格
# plt.style.use("bmh") # Bayesian Methods for Hackers 风格
plt.rcParams["font.family"] = "Times New Roman"
def smooth(values, weight=0.6):
"""
指数滑动平均 (Exponential Moving Average, EMA)
weight 越大越平滑,0~1之间
"""
smoothed = []
last = values[0]
for v in values:
last = last * weight + (1 - weight) * v
smoothed.append(last)
return np.array(smoothed)
def read_event_scalar(event_file, tag):
"""
从 event 文件中读取某个 tag 的标量数据
"""
ea = event_accumulator.EventAccumulator(event_file)
ea.Reload()
if tag not in ea.Tags()["scalars"]:
raise ValueError(f"Tag {tag} not found in {event_file}, available: {ea.Tags()['scalars']}")
events = ea.Scalars(tag)
steps = [e.step for e in events]
values = [e.value for e in events]
return steps, values
def plot_multiple_events(event_files, tags, save_path, fontsize=18):
"""
在一张图中绘制多个 event 文件的同一个 tag 曲线
"""
# plt.figure(figsize=(8, 10))
plt.figure(figsize=(6, 8))
for i, tag in enumerate(tags):
plt.subplot(len(tags), 1, i + 1)
max_val, max_cfg = -1, None
for j, event_file in enumerate(event_files):
label = osp.basename(osp.dirname(osp.dirname(event_file)))
steps, values = read_event_scalar(event_file, tag)
if max_val < np.max(values):
max_val = np.max(values)
max_cfg = label
values = smooth(values, weight=0.6)
plt.plot(steps, values, label=label)
plt.title(tag, fontsize=fontsize)
plt.xlabel("Seen Prompts", fontsize=fontsize-2)
# plt.ylabel(tag)
plt.xticks(steps[::10], [f'{step * 40 / 1000:.1f}k' for step in steps[::10]], fontsize=fontsize-2)
plt.legend(loc='lower right', fontsize=fontsize-4)
plt.grid(True)
plt.tight_layout()
print(f"Max {tag}: {max_val:.4f} in {max_cfg}")
plt.savefig(save_path, dpi=400)
# plt.show()
def plot_dpo_implicit_acc_beta():
event_files = glob('exps/audio_video/ablation/dpo/beta/*/tensorboard/events.out.tfevents.*')
event_files = sorted(event_files, key=lambda x: int(osp.basename(osp.dirname(osp.dirname(x))).split('_')[-1]))
event_files = [path for path in event_files if '300/' not in path and '700/' not in path]
tags = ["implicit_acc_audio", "implicit_acc_video"]
plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_beta.pdf')
def plot_dpo_implicit_acc_lr():
event_files = [
glob(f'exps/audio_video/ablation/dpo/lr/lr_{lr}/tensorboard/events.out.tfevents.*')[0] for lr in ['1e-5', '5e-6', '1e-6']
]
tags = ["implicit_acc_audio", "implicit_acc_video"]
plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_lr.pdf')
def plot_perform_radar():
# 任务维度和范围
labels = ["V-Quality", "A-Quality", "TV-Align", "TA-Align", "AV-Align", "AV-Sync"]
ranges = [(0, 3.0), (3.8, 5.2), (2, 3.19), (1, 1.98), (0, 2.8), (0.4, 1.35)]
# 示例分数
scores = {
"JavisDiT": [1.02, 4.28, 2.6, 1.3, 1.8, 0.75],
"UniVerse-1": [1.52, 4.09, 2.9, 1.1, 1.0, 0.80],
"Ours": [2.47, 4.91, 3.0, 1.66, 1.92, 1.02],
"Veo-3": [2.84, 5.11, 3.1, 1.9, 2.6, 1.28],
}
# 归一化函数
def normalize(values, ranges, labels):
return [(v - ranges[l][0]) / (ranges[l][1] - ranges[l][0]) for l, v in enumerate(values)]
# 雷达图准备
N = len(labels)
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
angles += angles[:1]
fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
fontsize = 16
ax.set_theta_offset(np.pi/2) # 设置起始角度 (0° 对应 π/2 → 90°)
ax.set_theta_direction(1) # 顺时针 = -1,逆时针 = 1
# 绘制模型曲线
for name, values in scores.items():
norm_values = normalize(values, ranges, labels)
norm_values += norm_values[:1]
ax.plot(angles, norm_values, label=name, linewidth=1.5 if name in ['Ours', 'Veo-3'] else 1)
ax.fill(angles, norm_values, alpha=0.1)
# 关闭默认的 y 轴刻度
ax.set_ylim(0, 1)
ax.set_yticks([0.25,0.5,0.75])
ax.set_yticklabels([]) # 不显示标签,只保留圈
# ✅ 手动为每个维度绘制原始刻度
for i, angle in enumerate(angles[:-1]):
rmin, rmax = ranges[i]
ticks = np.linspace(rmin, rmax, 5)[1:-1] # 5-2=3 个刻度
for t in ticks:
rt = (t - rmin) / (rmax - rmin) # normalize
ax.text(angle, rt+0.03, f"{t:.1f}", ha="center", va="center", fontsize=fontsize-2, color="gray")
# 设置轴标签
ax.set_xticks(angles[:-1])
labels_text = ax.set_xticklabels(labels, fontsize=fontsize, fontweight="bold")
# ⭐ 关键修改:为每个标签设置一个新的 y 坐标 (在极坐标中是径向距离 r)
# 这里的 1.1 表示将标签放置在半径为 1.1 的位置,而不是默认的 1
# 你可以根据需要调整这个值,使其不与最外圈重合
for i, (label, angle) in enumerate(zip(labels_text, angles[:-1])):
# print(label.get_position())
if i not in [0, 3]: # 0 和 3 的位置比较特殊,单独调整
label.set_position((angle, -0.12)) # 设置标签的位置offset
label.set_ha('center') # 水平居中
label.set_va('center') # 垂直居中,根据需要也可以调整为 'top' 或 'bottom'
ax.legend(loc="upper right", bbox_to_anchor=(1.17, 1.15), fontsize=fontsize-2)
plt.tight_layout()
plt.savefig('./debug/plot/perform_radar.pdf')
def plot_ablation_bar():
# 模拟数据:6个任务,每个任务的数值范围不同
tasks = ["FVD ↓", "FAD ↓" , "TV-IB ↑", "TA-IB ↑", "AV-IB ↑", "JavisScore ↑", "DeSync ↓"]
models = [
"A-LoRA + AV-LoRA (r=64)",
"A-noLoRA + AV-AttnLoRA (r=64)",
"A-noLoRA + AV-LoRA (r=64)",
"A-noLoRA + AV-LoRA (r=32)",
"A-noLoRA + AV-LoRA (r=128)"
]
# 原始分数 (注意:每个任务数值范围不同)
data = np.array([
[311.6, 223.1, 221.3, 222.5, 218.6],
[ 5.80, 5.66, 5.51, 5.54, 5.60],
[ 16.2, 28.1, 28.3, 28.3, 28.2],
[ 14.2, 14.7, 15.3, 15.2, 14.7],
[ 12.6, 18.6, 19.4, 19.2, 18.0],
[ 9.1, 14.1, 15.1, 14.7, 14.3],
[ 96.9, 95.8, 90.1, 90.0, 90.1],
])
data_range = np.array([
[200, 350],
[5, 6],
[10, 35],
[10, 18],
[10, 25],
[5, 20],
[85, 100],
])
# ---- 步骤1: 每个任务单独归一化到 [0,1] ----
# norm_data = (data - data.min(axis=1, keepdims=True)) / \
# (data.max(axis=1, keepdims=True) - data.min(axis=1, keepdims=True))
norm_data = (data - data_range[:, :1]) / (data_range[:, 1:] - data_range[:, :1])
# ---- 步骤2: 画图 ----
x = np.arange(len(tasks)) # 每个任务的位置
bar_width = 0.15
fontsize = 18
colors = ["grey", "lightgrey", "royalblue", "cornflowerblue", "lightsteelblue"]
hatches = [None, None, "//", None, None] # 突出
fig, ax = plt.subplots(figsize=(11.2,4))
# 将网格放在最下层,并且非常淡
ax.set_axisbelow(True)
ax.yaxis.grid(True, linestyle='--', linewidth=0.7, alpha=0.3, color='gray', zorder=0)
for i, model in enumerate(models):
bars = ax.bar(
x + i*bar_width - (len(models)-1)/2*bar_width, # 居中
norm_data[:, i],
width=bar_width,
label=model,
color=colors[i],
hatch=hatches[i],
edgecolor="white",
linewidth=0.5
)
# # 在柱子上方标注原始值
# for j, bar in enumerate(bars):
# ax.text(
# bar.get_x() + bar.get_width()/2,
# bar.get_height() + 0.02, # 稍微高于柱子
# f"{data[j, i]:.1f}", # 原始值
# ha="center", va="bottom", fontsize=fontsize-4
# )
# ---- 步骤3: 样式美化 ----
ax.set_xticks(x)
ax.set_xlim(x[0]-0.5, x[-1]+0.5)
ax.set_xticklabels(tasks, fontsize=fontsize)
ax.set_ylim(0, 1) # 因为归一化
ax.set_yticklabels([]) # 不显示 y 轴刻度
# 图例放在上方
ax.legend(
loc="upper center", bbox_to_anchor=(0.5, 1.3),
ncol=int(math.ceil(len(models)/2)), frameon=False, fontsize=fontsize-3
)
plt.tight_layout()
plt.subplots_adjust(left=0.02, right=0.98, top=0.8, bottom=0.1) # 调整边界范围
plt.savefig('./debug/plot/lora_cfg_bar.pdf')
def plot_data_filtering():
# 定义节点的顺序,以便后面设置坐标
labels = [
# 第 1 列 (x=0)
"Raw Videos", # 0
# 第 2 列 (x=0.33)
"Clean Videos", # 1
"Speech Videos", # 2
# 第 3 列 (x=0.66)
"HQ Videos", # 3
"Scoring Filters", # 4
# 第 4 列 (x=1)
"SFT", # 5
"DPO", # 6
]
# 节点颜色也需要对应新的顺序
colors = [
"purple", "blue", "grey", "darkgreen", "grey", "gold", "brown"
]
# 2. ⭐ 核心改动:为每个节点手动指定 x, y 坐标
# x 控制列,y 控制行。所有值都在 0 到 1 之间。
node_x = [
0.01, # Raw Videos
0.33, 0.33, # Clean Videos, Speech Videos
0.66, 0.66, # HQ Videos, Scoring Filters
0.99, 0.99, # SFT, DPO
]
node_y = [
0.3, # Raw Videos
0.2, 0.7, # Clean Videos, Speech Videos
0.1, 0.45, # HQ Videos, Scoring Filters
0.1, 0.3, # SFT, DPO
]
# 3. 修正数据流 (link),使其与参考图的流向和流量匹配
# 这里的 value 是我根据图上流量的粗细估算的,你可以换成真实数据
links = {
'source': [0, 0, 1, 1, 3, 3],
'target': [1, 2, 3, 4, 5, 6],
'value': [66, 34, 33, 33, 29, 4],
# ⭐ 美化改动:设置连接线的颜色和透明度
'color': ['rgba(128, 128, 128, 0.3)'] * 6
}
# 4. 创建图表
fig = go.Figure(data=[go.Sankey(
# arrangement 已被 x, y 坐标取代,可以移除
node=dict(
pad=25,
thickness=30,
line=dict(color="black", width=0.5),
label=labels,
color=colors,
# 指定坐标
x=node_x,
y=node_y
),
link=dict(
source=links['source'],
target=links['target'],
value=links['value'],
color=links['color']
)
)])
fig.update_layout(
# title_text="视频数据处理流程桑基图",
font_size=16, # 你可以保留这个作为基础字号
height=600,
# ⭐ 新增的代码 ⭐
font=dict(
family="Times New Roman", # 设置字体族
size=16, # 可以在这里统一设置字号
color="black" # 设置字体颜色
)
)
# fig.show()
fig.write_image('./debug/plot/data_filtering.pdf')
def stat_audio_data_dist():
data_root = '/mnt/HithinkOmniSSD/user_workspace/liukai4/datasets/JavisDiT/train/audio'
df = pd.read_csv(f'{data_root}/JavisDiT_train_audio_v1.csv')
subsets = []
for subset in os.listdir(data_root):
if osp.isdir(f'{data_root}/{subset}'):
subsets.append(subset)
stat = []
for subset in subsets:
stat.append(df['audio_path'].str.contains(subset).sum())
stat = np.array(stat)
indices = np.argsort(-stat)
subsets = [subsets[i] for i in indices]
stat = stat[indices]
rel_stat = stat / stat.sum()
for i in range(len(subsets)):
print(f'{subsets[i]:<10} {stat[i]:>5} {rel_stat[i]:.2%}')
if __name__ == "__main__":
# plot_dpo_implicit_acc_beta()
# plot_dpo_implicit_acc_lr()
# plot_perform_radar()
plot_ablation_bar()
# plot_data_filtering()
# stat_audio_data_dist()
pass