Spaces:
Sleeping
Sleeping
File size: 6,189 Bytes
1b7ef59 957b1d0 1b7ef59 957b1d0 1b7ef59 957b1d0 ecadacc 4de7c02 ecadacc 4de7c02 ecadacc 957b1d0 13b187b 957b1d0 ef2030d 957b1d0 c093989 13b187b 4de7c02 0db36d4 4de7c02 0db36d4 ecadacc 0db36d4 4de7c02 0db36d4 957b1d0 ecadacc 13b187b 4de7c02 13b187b ecadacc 4de7c02 ecadacc 13b187b 957b1d0 e9e6d27 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from spaces import GPU
import gradio as gr
import torch
import os
import pandas as pd
from types import SimpleNamespace
from extractor.extract_rf_feats import VideoDataset_feature
from extractor.extract_slowfast_clip import SlowFast, extract_features_slowfast_pool
from extractor.extract_swint_clip import SwinT, extract_features_swint_pool
from model_regression import Mlp, preprocess_data
from demo_test import evaluate_video_quality, load_model, get_transform
@GPU
def run_diva_vqa(video_path, is_finetune, train_data_name, test_data_name, network_name):
if not os.path.exists(video_path):
return "β No video uploaded or the uploaded file has expired. Please upload again."
print("CUDA available:", torch.cuda.is_available())
print("Current device:", torch.cuda.current_device())
config = SimpleNamespace(**{
'select_criteria': 'byrmse',
'is_finetune': is_finetune,
'save_path': 'model/',
'train_data_name': train_data_name,
'test_data_name': test_data_name,
'test_video_path': video_path,
'network_name': network_name,
'num_workers': 0,
'resize': 224,
'patch_size': 16,
'target_size': 224,
'model_name': 'Mlp',
})
print(config.test_video_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# test demo video
resize_transform = get_transform(config.resize)
top_n = int(config.target_size /config. patch_size) * int(config.target_size / config.patch_size)
data = {'vid': [os.path.splitext(os.path.basename(config.test_video_path))[0]],
'test_data_name': [config.test_data_name],
'test_video_path': [config.test_video_path]}
videos_dir = os.path.dirname(config.test_video_path)
test_df = pd.DataFrame(data)
print(test_df.T)
dataset = VideoDataset_feature(videos_dir, test_df, resize_transform, config.resize, config.test_data_name, config.patch_size, config.target_size, top_n)
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=False, num_workers=min(config.num_workers, os.cpu_count()), pin_memory=True
)
# load models to device
model_slowfast = SlowFast().to(device)
if config.network_name == 'diva-vqa':
model_swint = SwinT(global_pool='avg').to(device) # 'swin_base_patch4_window7_224.ms_in22k_ft_in1k'
input_features = 9984
elif config.network_name == 'diva-vqa_large':
model_swint = SwinT(model_name='swin_large_patch4_window7_224', global_pool='avg', pretrained=True).to(device)
input_features = 11520
model_mlp = load_model(config, device, input_features)
try:
score, runtime = evaluate_video_quality(config, data_loader, model_slowfast, model_swint, model_mlp, device)
return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)"
except Exception as e:
return f"β Error: {str(e)}"
finally:
if "gradio" in video_path and os.path.exists(video_path):
os.remove(video_path)
def toggle_finetune_visibility(train_dataset):
"""
when using train dataset is lsvq_train
"""
return gr.update(visible=(train_dataset == "lsvq_train"))
def update_test_dataset(is_finetune, train_dataset, current_test_value):
if train_dataset != "lsvq_train":
msg = f"Intra-dataset experiment β test dataset is automatically set to **{train_dataset}**."
return gr.update(value=train_dataset, visible=False), gr.update(value=msg, visible=True)
else:
return gr.update(visible=is_finetune, value=current_test_value), gr.update(value="", visible=False)
with gr.Blocks() as demo:
gr.Markdown("# πΉ DIVA-VQA Online Demo")
gr.Markdown(
"Upload a short video and get the predicted perceptual quality score using the DIVA-VQA model. "
"You can try our test video from KoNViD-1k: "
"<a href='https://huggingface.co/spaces/xinyiW915/DIVA-VQA/blob/main/ugc_original_videos/5636101558_540p.mp4' target='_blank'>demo video</a>. "
"<br><br>"
# "βοΈ This demo is currently running on <strong>Hugging Face CPU Basic</strong>: 2 vCPU β’ 16 GB RAM."
"βοΈ This demo is currently running on <strong>Hugging Face ZeroGPU Space</strong>: Dynamic resources (NVIDIA A100)."
)
with gr.Row():
with gr.Column(scale=2):
video_input = gr.Video(label="Upload a Video (e.g. mp4)")
train_dataset = gr.Dropdown(
label="Train Dataset",
choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_train", "youtube_ugc"],
value="lsvq_train"
)
is_finetune_checkbox = gr.Checkbox(label="Use Finetuning?", value=False, visible=True)
test_dataset = gr.Dropdown(
label="Test Dataset for Finetuning",
choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_test", "lsvq_test_1080p", "youtube_ugc"],
value="konvid_1k",
visible=False
)
mode_message = gr.Markdown("", visible=False)
model_dropdown = gr.Dropdown(
label="Our Models",
choices=["diva-vqa", "diva-vqa_large"],
value="diva-vqa_large"
)
run_button = gr.Button("Run Prediction")
with gr.Column(scale=1):
output_box = gr.Textbox(label="Predicted Perceptual Quality Score (0β100)", lines=5)
train_dataset.change(
fn=toggle_finetune_visibility,
inputs=train_dataset,
outputs=is_finetune_checkbox
)
is_finetune_checkbox.change(
fn=update_test_dataset,
inputs=[is_finetune_checkbox, train_dataset, test_dataset],
outputs=[test_dataset, mode_message]
)
train_dataset.change(
fn=update_test_dataset,
inputs=[is_finetune_checkbox, train_dataset, test_dataset],
outputs=[test_dataset, mode_message]
)
run_button.click(
fn=run_diva_vqa,
inputs=[video_input, is_finetune_checkbox, train_dataset, test_dataset, model_dropdown],
outputs=output_box
)
demo.launch()
|