Upload folder using huggingface_hub

e490e7e verified about 2 months ago

12.9 kB

	import os
	import os.path as osp
	from glob import glob
	import math
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import plotly.graph_objects as go
	from tensorboard.backend.event_processing import event_accumulator

	# plt.style.use("seaborn-v0_8") # seaborn 风格
	# plt.style.use("ggplot") # seaborn 风格
	plt.style.use("classic") # 经典风格
	# plt.style.use("bmh") # Bayesian Methods for Hackers 风格
	plt.rcParams["font.family"] = "Times New Roman"


	def smooth(values, weight=0.6):
	"""
	指数滑动平均 (Exponential Moving Average, EMA)
	weight 越大越平滑，0~1之间
	"""
	smoothed = []
	last = values[0]
	for v in values:
	last = last * weight + (1 - weight) * v
	smoothed.append(last)
	return np.array(smoothed)


	def read_event_scalar(event_file, tag):
	"""
	从 event 文件中读取某个 tag 的标量数据
	"""
	ea = event_accumulator.EventAccumulator(event_file)
	ea.Reload()
	if tag not in ea.Tags()["scalars"]:
	raise ValueError(f"Tag {tag} not found in {event_file}, available: {ea.Tags()['scalars']}")
	events = ea.Scalars(tag)
	steps = [e.step for e in events]
	values = [e.value for e in events]
	return steps, values


	def plot_multiple_events(event_files, tags, save_path, fontsize=18):
	"""
	在一张图中绘制多个 event 文件的同一个 tag 曲线
	"""
	# plt.figure(figsize=(8, 10))
	plt.figure(figsize=(6, 8))
	for i, tag in enumerate(tags):
	plt.subplot(len(tags), 1, i + 1)
	max_val, max_cfg = -1, None
	for j, event_file in enumerate(event_files):
	label = osp.basename(osp.dirname(osp.dirname(event_file)))
	steps, values = read_event_scalar(event_file, tag)
	if max_val < np.max(values):
	max_val = np.max(values)
	max_cfg = label
	values = smooth(values, weight=0.6)
	plt.plot(steps, values, label=label)

	plt.title(tag, fontsize=fontsize)
	plt.xlabel("Seen Prompts", fontsize=fontsize-2)
	# plt.ylabel(tag)
	plt.xticks(steps[::10], [f'{step * 40 / 1000:.1f}k' for step in steps[::10]], fontsize=fontsize-2)
	plt.legend(loc='lower right', fontsize=fontsize-4)
	plt.grid(True)
	plt.tight_layout()
	print(f"Max {tag}: {max_val:.4f} in {max_cfg}")

	plt.savefig(save_path, dpi=400)

	# plt.show()


	def plot_dpo_implicit_acc_beta():
	event_files = glob('exps/audio_video/ablation/dpo/beta//tensorboard/events.out.tfevents.')
	event_files = sorted(event_files, key=lambda x: int(osp.basename(osp.dirname(osp.dirname(x))).split('_')[-1]))
	event_files = [path for path in event_files if '300/' not in path and '700/' not in path]
	tags = ["implicit_acc_audio", "implicit_acc_video"]
	plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_beta.pdf')


	def plot_dpo_implicit_acc_lr():
	event_files = [
	glob(f'exps/audio_video/ablation/dpo/lr/lr_{lr}/tensorboard/events.out.tfevents.*')[0] for lr in ['1e-5', '5e-6', '1e-6']
	]
	tags = ["implicit_acc_audio", "implicit_acc_video"]
	plot_multiple_events(event_files, tags, save_path='./debug/plot/implicit_acc_lr.pdf')


	def plot_perform_radar():
	# 任务维度和范围
	labels = ["V-Quality", "A-Quality", "TV-Align", "TA-Align", "AV-Align", "AV-Sync"]
	ranges = [(0, 3.0), (3.8, 5.2), (2, 3.19), (1, 1.98), (0, 2.8), (0.4, 1.35)]

	# 示例分数
	scores = {
	"JavisDiT": [1.02, 4.28, 2.6, 1.3, 1.8, 0.75],
	"UniVerse-1": [1.52, 4.09, 2.9, 1.1, 1.0, 0.80],
	"Ours": [2.47, 4.91, 3.0, 1.66, 1.92, 1.02],
	"Veo-3": [2.84, 5.11, 3.1, 1.9, 2.6, 1.28],
	}

	# 归一化函数
	def normalize(values, ranges, labels):
	return [(v - ranges[l][0]) / (ranges[l][1] - ranges[l][0]) for l, v in enumerate(values)]

	# 雷达图准备
	N = len(labels)
	angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist()
	angles += angles[:1]

	fig, ax = plt.subplots(figsize=(6, 6), subplot_kw=dict(polar=True))
	fontsize = 16

	ax.set_theta_offset(np.pi/2) # 设置起始角度 (0° 对应 π/2 → 90°)
	ax.set_theta_direction(1) # 顺时针 = -1，逆时针 = 1

	# 绘制模型曲线
	for name, values in scores.items():
	norm_values = normalize(values, ranges, labels)
	norm_values += norm_values[:1]
	ax.plot(angles, norm_values, label=name, linewidth=1.5 if name in ['Ours', 'Veo-3'] else 1)
	ax.fill(angles, norm_values, alpha=0.1)

	# 关闭默认的 y 轴刻度
	ax.set_ylim(0, 1)
	ax.set_yticks([0.25,0.5,0.75])
	ax.set_yticklabels([]) # 不显示标签，只保留圈

	# ✅ 手动为每个维度绘制原始刻度
	for i, angle in enumerate(angles[:-1]):
	rmin, rmax = ranges[i]
	ticks = np.linspace(rmin, rmax, 5)[1:-1] # 5-2=3 个刻度
	for t in ticks:
	rt = (t - rmin) / (rmax - rmin) # normalize
	ax.text(angle, rt+0.03, f"{t:.1f}", ha="center", va="center", fontsize=fontsize-2, color="gray")

	# 设置轴标签
	ax.set_xticks(angles[:-1])
	labels_text = ax.set_xticklabels(labels, fontsize=fontsize, fontweight="bold")

	# ⭐ 关键修改：为每个标签设置一个新的 y 坐标 (在极坐标中是径向距离 r)
	# 这里的 1.1 表示将标签放置在半径为 1.1 的位置，而不是默认的 1
	# 你可以根据需要调整这个值，使其不与最外圈重合
	for i, (label, angle) in enumerate(zip(labels_text, angles[:-1])):
	# print(label.get_position())
	if i not in [0, 3]: # 0 和 3 的位置比较特殊，单独调整
	label.set_position((angle, -0.12)) # 设置标签的位置offset
	label.set_ha('center') # 水平居中
	label.set_va('center') # 垂直居中，根据需要也可以调整为 'top' 或 'bottom'

	ax.legend(loc="upper right", bbox_to_anchor=(1.17, 1.15), fontsize=fontsize-2)
	plt.tight_layout()
	plt.savefig('./debug/plot/perform_radar.pdf')


	def plot_ablation_bar():
	# 模拟数据：6个任务，每个任务的数值范围不同
	tasks = ["FVD ↓", "FAD ↓" , "TV-IB ↑", "TA-IB ↑", "AV-IB ↑", "JavisScore ↑", "DeSync ↓"]
	models = [
	"A-LoRA + AV-LoRA (r=64)",
	"A-noLoRA + AV-AttnLoRA (r=64)",
	"A-noLoRA + AV-LoRA (r=64)",
	"A-noLoRA + AV-LoRA (r=32)",
	"A-noLoRA + AV-LoRA (r=128)"
	]

	# 原始分数 (注意：每个任务数值范围不同)
	data = np.array([
	[311.6, 223.1, 221.3, 222.5, 218.6],
	[ 5.80, 5.66, 5.51, 5.54, 5.60],
	[ 16.2, 28.1, 28.3, 28.3, 28.2],
	[ 14.2, 14.7, 15.3, 15.2, 14.7],
	[ 12.6, 18.6, 19.4, 19.2, 18.0],
	[ 9.1, 14.1, 15.1, 14.7, 14.3],
	[ 96.9, 95.8, 90.1, 90.0, 90.1],
	])
	data_range = np.array([
	[200, 350],
	[5, 6],
	[10, 35],
	[10, 18],
	[10, 25],
	[5, 20],
	[85, 100],
	])

	# ---- 步骤1: 每个任务单独归一化到 [0,1] ----
	# norm_data = (data - data.min(axis=1, keepdims=True)) / \
	# (data.max(axis=1, keepdims=True) - data.min(axis=1, keepdims=True))
	norm_data = (data - data_range[:, :1]) / (data_range[:, 1:] - data_range[:, :1])

	# ---- 步骤2: 画图 ----
	x = np.arange(len(tasks)) # 每个任务的位置
	bar_width = 0.15
	fontsize = 18

	colors = ["grey", "lightgrey", "royalblue", "cornflowerblue", "lightsteelblue"]
	hatches = [None, None, "//", None, None] # 突出

	fig, ax = plt.subplots(figsize=(11.2,4))

	# 将网格放在最下层，并且非常淡
	ax.set_axisbelow(True)
	ax.yaxis.grid(True, linestyle='--', linewidth=0.7, alpha=0.3, color='gray', zorder=0)

	for i, model in enumerate(models):
	bars = ax.bar(
	x + ibar_width - (len(models)-1)/2bar_width, # 居中
	norm_data[:, i],
	width=bar_width,
	label=model,
	color=colors[i],
	hatch=hatches[i],
	edgecolor="white",
	linewidth=0.5
	)
	# # 在柱子上方标注原始值
	# for j, bar in enumerate(bars):
	# ax.text(
	# bar.get_x() + bar.get_width()/2,
	# bar.get_height() + 0.02, # 稍微高于柱子
	# f"{data[j, i]:.1f}", # 原始值
	# ha="center", va="bottom", fontsize=fontsize-4
	# )

	# ---- 步骤3: 样式美化 ----
	ax.set_xticks(x)
	ax.set_xlim(x[0]-0.5, x[-1]+0.5)
	ax.set_xticklabels(tasks, fontsize=fontsize)
	ax.set_ylim(0, 1) # 因为归一化
	ax.set_yticklabels([]) # 不显示 y 轴刻度

	# 图例放在上方
	ax.legend(
	loc="upper center", bbox_to_anchor=(0.5, 1.3),
	ncol=int(math.ceil(len(models)/2)), frameon=False, fontsize=fontsize-3
	)

	plt.tight_layout()
	plt.subplots_adjust(left=0.02, right=0.98, top=0.8, bottom=0.1) # 调整边界范围
	plt.savefig('./debug/plot/lora_cfg_bar.pdf')


	def plot_data_filtering():
	# 定义节点的顺序，以便后面设置坐标
	labels = [
	# 第 1 列 (x=0)
	"Raw Videos", # 0
	# 第 2 列 (x=0.33)
	"Clean Videos", # 1
	"Speech Videos", # 2
	# 第 3 列 (x=0.66)
	"HQ Videos", # 3
	"Scoring Filters", # 4
	# 第 4 列 (x=1)
	"SFT", # 5
	"DPO", # 6
	]

	# 节点颜色也需要对应新的顺序
	colors = [
	"purple", "blue", "grey", "darkgreen", "grey", "gold", "brown"
	]


	# 2. ⭐ 核心改动：为每个节点手动指定 x, y 坐标
	# x 控制列，y 控制行。所有值都在 0 到 1 之间。
	node_x = [
	0.01, # Raw Videos
	0.33, 0.33, # Clean Videos, Speech Videos
	0.66, 0.66, # HQ Videos, Scoring Filters
	0.99, 0.99, # SFT, DPO
	]
	node_y = [
	0.3, # Raw Videos
	0.2, 0.7, # Clean Videos, Speech Videos
	0.1, 0.45, # HQ Videos, Scoring Filters
	0.1, 0.3, # SFT, DPO
	]


	# 3. 修正数据流 (link)，使其与参考图的流向和流量匹配
	# 这里的 value 是我根据图上流量的粗细估算的，你可以换成真实数据
	links = {
	'source': [0, 0, 1, 1, 3, 3],
	'target': [1, 2, 3, 4, 5, 6],
	'value': [66, 34, 33, 33, 29, 4],
	# ⭐ 美化改动：设置连接线的颜色和透明度
	'color': ['rgba(128, 128, 128, 0.3)'] * 6
	}


	# 4. 创建图表
	fig = go.Figure(data=[go.Sankey(
	# arrangement 已被 x, y 坐标取代，可以移除
	node=dict(
	pad=25,
	thickness=30,
	line=dict(color="black", width=0.5),
	label=labels,
	color=colors,
	# 指定坐标
	x=node_x,
	y=node_y
	),
	link=dict(
	source=links['source'],
	target=links['target'],
	value=links['value'],
	color=links['color']
	)
	)])

	fig.update_layout(
	# title_text="视频数据处理流程桑基图",
	font_size=16, # 你可以保留这个作为基础字号
	height=600,
	# ⭐ 新增的代码 ⭐
	font=dict(
	family="Times New Roman", # 设置字体族
	size=16, # 可以在这里统一设置字号
	color="black" # 设置字体颜色
	)
	)

	# fig.show()
	fig.write_image('./debug/plot/data_filtering.pdf')


	def stat_audio_data_dist():
	data_root = '/mnt/HithinkOmniSSD/user_workspace/liukai4/datasets/JavisDiT/train/audio'
	df = pd.read_csv(f'{data_root}/JavisDiT_train_audio_v1.csv')
	subsets = []
	for subset in os.listdir(data_root):
	if osp.isdir(f'{data_root}/{subset}'):
	subsets.append(subset)
	stat = []
	for subset in subsets:
	stat.append(df['audio_path'].str.contains(subset).sum())
	stat = np.array(stat)
	indices = np.argsort(-stat)

	subsets = [subsets[i] for i in indices]
	stat = stat[indices]
	rel_stat = stat / stat.sum()
	for i in range(len(subsets)):
	print(f'{subsets[i]:<10} {stat[i]:>5} {rel_stat[i]:.2%}')


	if __name__ == "__main__":
	# plot_dpo_implicit_acc_beta()
	# plot_dpo_implicit_acc_lr()

	# plot_perform_radar()

	plot_ablation_bar()

	# plot_data_filtering()
	# stat_audio_data_dist()

	pass