Spaces:

xinyiW915
/

DIVA-VQA

Sleeping

App Files Files Community

DIVA-VQA / app.py

xinyiW915

Upload 37 files

e9e6d27 verified 29 days ago

raw

history blame contribute delete

6.19 kB

	from spaces import GPU
	import gradio as gr
	import torch
	import os
	import pandas as pd
	from types import SimpleNamespace

	from extractor.extract_rf_feats import VideoDataset_feature
	from extractor.extract_slowfast_clip import SlowFast, extract_features_slowfast_pool
	from extractor.extract_swint_clip import SwinT, extract_features_swint_pool
	from model_regression import Mlp, preprocess_data
	from demo_test import evaluate_video_quality, load_model, get_transform


	@GPU
	def run_diva_vqa(video_path, is_finetune, train_data_name, test_data_name, network_name):
	if not os.path.exists(video_path):
	return "❌ No video uploaded or the uploaded file has expired. Please upload again."

	print("CUDA available:", torch.cuda.is_available())
	print("Current device:", torch.cuda.current_device())

	config = SimpleNamespace(**{
	'select_criteria': 'byrmse',
	'is_finetune': is_finetune,
	'save_path': 'model/',
	'train_data_name': train_data_name,
	'test_data_name': test_data_name,
	'test_video_path': video_path,
	'network_name': network_name,
	'num_workers': 0,
	'resize': 224,
	'patch_size': 16,
	'target_size': 224,
	'model_name': 'Mlp',
	})
	print(config.test_video_path)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# test demo video
	resize_transform = get_transform(config.resize)
	top_n = int(config.target_size /config. patch_size) * int(config.target_size / config.patch_size)
	data = {'vid': [os.path.splitext(os.path.basename(config.test_video_path))[0]],
	'test_data_name': [config.test_data_name],
	'test_video_path': [config.test_video_path]}
	videos_dir = os.path.dirname(config.test_video_path)
	test_df = pd.DataFrame(data)
	print(test_df.T)

	dataset = VideoDataset_feature(videos_dir, test_df, resize_transform, config.resize, config.test_data_name, config.patch_size, config.target_size, top_n)
	data_loader = torch.utils.data.DataLoader(
	dataset, batch_size=1, shuffle=False, num_workers=min(config.num_workers, os.cpu_count()), pin_memory=True
	)

	# load models to device
	model_slowfast = SlowFast().to(device)
	if config.network_name == 'diva-vqa':
	model_swint = SwinT(global_pool='avg').to(device) # 'swin_base_patch4_window7_224.ms_in22k_ft_in1k'
	input_features = 9984
	elif config.network_name == 'diva-vqa_large':
	model_swint = SwinT(model_name='swin_large_patch4_window7_224', global_pool='avg', pretrained=True).to(device)
	input_features = 11520
	model_mlp = load_model(config, device, input_features)

	try:
	score, runtime = evaluate_video_quality(config, data_loader, model_slowfast, model_swint, model_mlp, device)
	return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)"
	except Exception as e:
	return f"❌ Error: {str(e)}"
	finally:
	if "gradio" in video_path and os.path.exists(video_path):
	os.remove(video_path)

	def toggle_finetune_visibility(train_dataset):
	"""
	when using train dataset is lsvq_train
	"""
	return gr.update(visible=(train_dataset == "lsvq_train"))

	def update_test_dataset(is_finetune, train_dataset, current_test_value):
	if train_dataset != "lsvq_train":
	msg = f"Intra-dataset experiment — test dataset is automatically set to {train_dataset}."
	return gr.update(value=train_dataset, visible=False), gr.update(value=msg, visible=True)
	else:
	return gr.update(visible=is_finetune, value=current_test_value), gr.update(value="", visible=False)


	with gr.Blocks() as demo:
	gr.Markdown("# 📹 DIVA-VQA Online Demo")
	gr.Markdown(
	"Upload a short video and get the predicted perceptual quality score using the DIVA-VQA model. "
	"You can try our test video from KoNViD-1k: "
	"<a href='https://huggingface.co/spaces/xinyiW915/DIVA-VQA/blob/main/ugc_original_videos/5636101558_540p.mp4' target='_blank'>demo video</a>. "
	"<br><br>"
	# "⚙️ This demo is currently running on <strong>Hugging Face CPU Basic</strong>: 2 vCPU • 16 GB RAM."
	"⚙️ This demo is currently running on <strong>Hugging Face ZeroGPU Space</strong>: Dynamic resources (NVIDIA A100)."
	)

	with gr.Row():
	with gr.Column(scale=2):
	video_input = gr.Video(label="Upload a Video (e.g. mp4)")

	train_dataset = gr.Dropdown(
	label="Train Dataset",
	choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_train", "youtube_ugc"],
	value="lsvq_train"
	)
	is_finetune_checkbox = gr.Checkbox(label="Use Finetuning?", value=False, visible=True)

	test_dataset = gr.Dropdown(
	label="Test Dataset for Finetuning",
	choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_test", "lsvq_test_1080p", "youtube_ugc"],
	value="konvid_1k",
	visible=False
	)
	mode_message = gr.Markdown("", visible=False)
	model_dropdown = gr.Dropdown(
	label="Our Models",
	choices=["diva-vqa", "diva-vqa_large"],
	value="diva-vqa_large"
	)
	run_button = gr.Button("Run Prediction")

	with gr.Column(scale=1):
	output_box = gr.Textbox(label="Predicted Perceptual Quality Score (0–100)", lines=5)

	train_dataset.change(
	fn=toggle_finetune_visibility,
	inputs=train_dataset,
	outputs=is_finetune_checkbox
	)

	is_finetune_checkbox.change(
	fn=update_test_dataset,
	inputs=[is_finetune_checkbox, train_dataset, test_dataset],
	outputs=[test_dataset, mode_message]
	)

	train_dataset.change(
	fn=update_test_dataset,
	inputs=[is_finetune_checkbox, train_dataset, test_dataset],
	outputs=[test_dataset, mode_message]
	)

	run_button.click(
	fn=run_diva_vqa,
	inputs=[video_input, is_finetune_checkbox, train_dataset, test_dataset, model_dropdown],
	outputs=output_box
	)

	demo.launch()