Spaces:

xinyiW915
/

DIVA-VQA

Sleeping

File size: 6,189 Bytes

1b7ef59
957b1d0
 
 
 
 
 
 
 
 
 
 
 
 
1b7ef59
957b1d0
 
 
 
1b7ef59
 
957b1d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecadacc
 
4de7c02
ecadacc
 
 
4de7c02
 
 
 
 
 
ecadacc
957b1d0
13b187b
 
 
957b1d0
ef2030d
957b1d0
 
c093989
 
13b187b
 
4de7c02
0db36d4
 
4de7c02
0db36d4
 
 
 
 
ecadacc
 
0db36d4
 
 
 
 
 
4de7c02
0db36d4
 
 
 
 
 
 
 
 
957b1d0
ecadacc
 
 
 
 
 
13b187b
4de7c02
 
 
13b187b
 
ecadacc
4de7c02
 
 
ecadacc
 
13b187b
 
 
 
 
957b1d0
e9e6d27

from spaces import GPU
import gradio as gr
import torch
import os
import pandas as pd
from types import SimpleNamespace

from extractor.extract_rf_feats import VideoDataset_feature
from extractor.extract_slowfast_clip import SlowFast, extract_features_slowfast_pool
from extractor.extract_swint_clip import SwinT, extract_features_swint_pool
from model_regression import Mlp, preprocess_data
from demo_test import evaluate_video_quality, load_model, get_transform


@GPU
def run_diva_vqa(video_path, is_finetune, train_data_name, test_data_name, network_name):
    if not os.path.exists(video_path):
        return "❌ No video uploaded or the uploaded file has expired. Please upload again."

    print("CUDA available:", torch.cuda.is_available())
    print("Current device:", torch.cuda.current_device())

    config = SimpleNamespace(**{
        'select_criteria': 'byrmse',
        'is_finetune': is_finetune,
        'save_path': 'model/',
        'train_data_name': train_data_name,
        'test_data_name': test_data_name,
        'test_video_path': video_path,
        'network_name': network_name,
        'num_workers': 0,
        'resize': 224,
        'patch_size': 16,
        'target_size': 224,
        'model_name': 'Mlp',
    })
    print(config.test_video_path)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # test demo video
    resize_transform = get_transform(config.resize)
    top_n = int(config.target_size /config. patch_size) * int(config.target_size / config.patch_size)
    data = {'vid': [os.path.splitext(os.path.basename(config.test_video_path))[0]],
        'test_data_name': [config.test_data_name],
        'test_video_path': [config.test_video_path]}
    videos_dir = os.path.dirname(config.test_video_path)
    test_df = pd.DataFrame(data)
    print(test_df.T)

    dataset = VideoDataset_feature(videos_dir, test_df, resize_transform, config.resize, config.test_data_name, config.patch_size, config.target_size, top_n)
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=False, num_workers=min(config.num_workers, os.cpu_count()), pin_memory=True
    )

    # load models to device
    model_slowfast = SlowFast().to(device)
    if config.network_name == 'diva-vqa':
        model_swint = SwinT(global_pool='avg').to(device) # 'swin_base_patch4_window7_224.ms_in22k_ft_in1k'
        input_features = 9984
    elif config.network_name == 'diva-vqa_large':
        model_swint = SwinT(model_name='swin_large_patch4_window7_224', global_pool='avg', pretrained=True).to(device)
        input_features = 11520
    model_mlp = load_model(config, device, input_features)

    try:
        score, runtime = evaluate_video_quality(config, data_loader, model_slowfast, model_swint, model_mlp, device)
        return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)"
    except Exception as e:
        return f"❌ Error: {str(e)}"
    finally:
        if "gradio" in video_path and os.path.exists(video_path):
            os.remove(video_path)

def toggle_finetune_visibility(train_dataset):
    """
    when using train dataset is lsvq_train
    """
    return gr.update(visible=(train_dataset == "lsvq_train"))

def update_test_dataset(is_finetune, train_dataset, current_test_value):
    if train_dataset != "lsvq_train":
        msg = f"Intra-dataset experiment — test dataset is automatically set to **{train_dataset}**."
        return gr.update(value=train_dataset, visible=False), gr.update(value=msg, visible=True)
    else:
        return gr.update(visible=is_finetune, value=current_test_value), gr.update(value="", visible=False)


with gr.Blocks() as demo:
    gr.Markdown("# 📹 DIVA-VQA Online Demo")
    gr.Markdown(
        "Upload a short video and get the predicted perceptual quality score using the DIVA-VQA model. "
        "You can try our test video from KoNViD-1k: "
        "<a href='https://huggingface.co/spaces/xinyiW915/DIVA-VQA/blob/main/ugc_original_videos/5636101558_540p.mp4' target='_blank'>demo video</a>. "
        "<br><br>"
        # "⚙️ This demo is currently running on <strong>Hugging Face CPU Basic</strong>: 2 vCPU • 16 GB RAM."
        "⚙️ This demo is currently running on <strong>Hugging Face ZeroGPU Space</strong>: Dynamic resources (NVIDIA A100)."
    )

    with gr.Row():
        with gr.Column(scale=2):
            video_input = gr.Video(label="Upload a Video (e.g. mp4)")

            train_dataset = gr.Dropdown(
                label="Train Dataset",
                choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_train", "youtube_ugc"],
                value="lsvq_train"
            )
            is_finetune_checkbox = gr.Checkbox(label="Use Finetuning?", value=False, visible=True)

            test_dataset = gr.Dropdown(
                label="Test Dataset for Finetuning",
                choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_test", "lsvq_test_1080p", "youtube_ugc"],
                value="konvid_1k",
                visible=False
            )
            mode_message = gr.Markdown("", visible=False)
            model_dropdown = gr.Dropdown(
                label="Our Models",
                choices=["diva-vqa", "diva-vqa_large"],
                value="diva-vqa_large"
            )
            run_button = gr.Button("Run Prediction")
            
        with gr.Column(scale=1):
            output_box = gr.Textbox(label="Predicted Perceptual Quality Score (0–100)", lines=5)

    train_dataset.change(
        fn=toggle_finetune_visibility,
        inputs=train_dataset,
        outputs=is_finetune_checkbox
    )

    is_finetune_checkbox.change(
        fn=update_test_dataset,
        inputs=[is_finetune_checkbox, train_dataset, test_dataset],
        outputs=[test_dataset, mode_message]
    )

    train_dataset.change(
        fn=update_test_dataset,
        inputs=[is_finetune_checkbox, train_dataset, test_dataset],
        outputs=[test_dataset, mode_message]
    )

    run_button.click(
        fn=run_diva_vqa,
        inputs=[video_input, is_finetune_checkbox, train_dataset, test_dataset, model_dropdown],
        outputs=output_box
    )

demo.launch()