Spaces:
Sleeping
Sleeping
| from spaces import GPU | |
| import gradio as gr | |
| import torch | |
| import os | |
| import pandas as pd | |
| from types import SimpleNamespace | |
| from extractor.extract_rf_feats import VideoDataset_feature | |
| from extractor.extract_slowfast_clip import SlowFast, extract_features_slowfast_pool | |
| from extractor.extract_swint_clip import SwinT, extract_features_swint_pool | |
| from model_regression import Mlp, preprocess_data | |
| from demo_test import evaluate_video_quality, load_model, get_transform | |
| def run_diva_vqa(video_path, is_finetune, train_data_name, test_data_name, network_name): | |
| if not os.path.exists(video_path): | |
| return "β No video uploaded or the uploaded file has expired. Please upload again." | |
| print("CUDA available:", torch.cuda.is_available()) | |
| print("Current device:", torch.cuda.current_device()) | |
| config = SimpleNamespace(**{ | |
| 'select_criteria': 'byrmse', | |
| 'is_finetune': is_finetune, | |
| 'save_path': 'model/', | |
| 'train_data_name': train_data_name, | |
| 'test_data_name': test_data_name, | |
| 'test_video_path': video_path, | |
| 'network_name': network_name, | |
| 'num_workers': 0, | |
| 'resize': 224, | |
| 'patch_size': 16, | |
| 'target_size': 224, | |
| 'model_name': 'Mlp', | |
| }) | |
| print(config.test_video_path) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # test demo video | |
| resize_transform = get_transform(config.resize) | |
| top_n = int(config.target_size /config. patch_size) * int(config.target_size / config.patch_size) | |
| data = {'vid': [os.path.splitext(os.path.basename(config.test_video_path))[0]], | |
| 'test_data_name': [config.test_data_name], | |
| 'test_video_path': [config.test_video_path]} | |
| videos_dir = os.path.dirname(config.test_video_path) | |
| test_df = pd.DataFrame(data) | |
| print(test_df.T) | |
| dataset = VideoDataset_feature(videos_dir, test_df, resize_transform, config.resize, config.test_data_name, config.patch_size, config.target_size, top_n) | |
| data_loader = torch.utils.data.DataLoader( | |
| dataset, batch_size=1, shuffle=False, num_workers=min(config.num_workers, os.cpu_count()), pin_memory=True | |
| ) | |
| # load models to device | |
| model_slowfast = SlowFast().to(device) | |
| if config.network_name == 'diva-vqa': | |
| model_swint = SwinT(global_pool='avg').to(device) # 'swin_base_patch4_window7_224.ms_in22k_ft_in1k' | |
| input_features = 9984 | |
| elif config.network_name == 'diva-vqa_large': | |
| model_swint = SwinT(model_name='swin_large_patch4_window7_224', global_pool='avg', pretrained=True).to(device) | |
| input_features = 11520 | |
| model_mlp = load_model(config, device, input_features) | |
| try: | |
| score, runtime = evaluate_video_quality(config, data_loader, model_slowfast, model_swint, model_mlp, device) | |
| return f"Predicted Quality Score: {score:.4f} (in {runtime:.2f}s)" | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| finally: | |
| if "gradio" in video_path and os.path.exists(video_path): | |
| os.remove(video_path) | |
| def toggle_finetune_visibility(train_dataset): | |
| """ | |
| when using train dataset is lsvq_train | |
| """ | |
| return gr.update(visible=(train_dataset == "lsvq_train")) | |
| def update_test_dataset(is_finetune, train_dataset, current_test_value): | |
| if train_dataset != "lsvq_train": | |
| msg = f"Intra-dataset experiment β test dataset is automatically set to **{train_dataset}**." | |
| return gr.update(value=train_dataset, visible=False), gr.update(value=msg, visible=True) | |
| else: | |
| return gr.update(visible=is_finetune, value=current_test_value), gr.update(value="", visible=False) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# πΉ DIVA-VQA Online Demo") | |
| gr.Markdown( | |
| "Upload a short video and get the predicted perceptual quality score using the DIVA-VQA model. " | |
| "You can try our test video from KoNViD-1k: " | |
| "<a href='https://huggingface.co/spaces/xinyiW915/DIVA-VQA/blob/main/ugc_original_videos/5636101558_540p.mp4' target='_blank'>demo video</a>. " | |
| "<br><br>" | |
| # "βοΈ This demo is currently running on <strong>Hugging Face CPU Basic</strong>: 2 vCPU β’ 16 GB RAM." | |
| "βοΈ This demo is currently running on <strong>Hugging Face ZeroGPU Space</strong>: Dynamic resources (NVIDIA A100)." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| video_input = gr.Video(label="Upload a Video (e.g. mp4)") | |
| train_dataset = gr.Dropdown( | |
| label="Train Dataset", | |
| choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_train", "youtube_ugc"], | |
| value="lsvq_train" | |
| ) | |
| is_finetune_checkbox = gr.Checkbox(label="Use Finetuning?", value=False, visible=True) | |
| test_dataset = gr.Dropdown( | |
| label="Test Dataset for Finetuning", | |
| choices=["cvd_2014", "konvid_1k", "live_vqc", "lsvq_test", "lsvq_test_1080p", "youtube_ugc"], | |
| value="konvid_1k", | |
| visible=False | |
| ) | |
| mode_message = gr.Markdown("", visible=False) | |
| model_dropdown = gr.Dropdown( | |
| label="Our Models", | |
| choices=["diva-vqa", "diva-vqa_large"], | |
| value="diva-vqa_large" | |
| ) | |
| run_button = gr.Button("Run Prediction") | |
| with gr.Column(scale=1): | |
| output_box = gr.Textbox(label="Predicted Perceptual Quality Score (0β100)", lines=5) | |
| train_dataset.change( | |
| fn=toggle_finetune_visibility, | |
| inputs=train_dataset, | |
| outputs=is_finetune_checkbox | |
| ) | |
| is_finetune_checkbox.change( | |
| fn=update_test_dataset, | |
| inputs=[is_finetune_checkbox, train_dataset, test_dataset], | |
| outputs=[test_dataset, mode_message] | |
| ) | |
| train_dataset.change( | |
| fn=update_test_dataset, | |
| inputs=[is_finetune_checkbox, train_dataset, test_dataset], | |
| outputs=[test_dataset, mode_message] | |
| ) | |
| run_button.click( | |
| fn=run_diva_vqa, | |
| inputs=[video_input, is_finetune_checkbox, train_dataset, test_dataset, model_dropdown], | |
| outputs=output_box | |
| ) | |
| demo.launch() | |