Spaces:
Runtime error
Runtime error
| import gradio | |
| import torchaudio | |
| from fastai.vision.all import * | |
| from fastai.learner import load_learner | |
| from torchvision.utils import save_image | |
| from huggingface_hub import hf_hub_download | |
| model = load_learner( | |
| hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl") | |
| ) | |
| EXAMPLES_PATH = Path("./examples") | |
| labels = model.dls.vocab | |
| with open("article.md") as f: | |
| article = f.read() | |
| interface_options = { | |
| "title": "Music Genre Classification", | |
| "description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)", | |
| "article": article, | |
| "interpretation": "default", | |
| "layout": "horizontal", | |
| # Audio from validation file | |
| "examples": ["000003.ogg", "000032.ogg", "000038.ogg", "000050.ogg", "000103.ogg"], | |
| "allow_flagging": "never" | |
| } | |
| ## Code from Dien Hoa Truong inference notebook: https://www.kaggle.com/code/dienhoa/inference-submission-music-genre | |
| N_FFT = 2048 | |
| HOP_LEN = 1024 | |
| def create_spectrogram(filename): | |
| audio, sr = torchaudio.load(filename) | |
| specgram = torchaudio.transforms.MelSpectrogram( | |
| sample_rate=sr, | |
| n_fft=N_FFT, | |
| win_length=N_FFT, | |
| hop_length=HOP_LEN, | |
| center=True, | |
| pad_mode="reflect", | |
| power=2.0, | |
| norm="slaney", | |
| onesided=True, | |
| n_mels=224, | |
| mel_scale="htk", | |
| )(audio).mean(axis=0) | |
| specgram = torchaudio.transforms.AmplitudeToDB()(specgram) | |
| specgram = specgram - specgram.min() | |
| specgram = specgram / specgram.max() | |
| return specgram | |
| def create_image(filename): | |
| specgram = create_spectrogram(filename) | |
| dest = Path("temp.png") | |
| save_image(specgram, "temp.png") | |
| # Code from: https://huggingface.co/spaces/suvash/food-101-resnet50 | |
| def predict(img): | |
| img = PILImage.create(img) | |
| _pred, _pred_w_idx, probs = model.predict(img) | |
| # gradio doesn't support tensors, so converting to float | |
| labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)} | |
| return labels_probs | |
| def end2endpipeline(filename): | |
| create_image(filename) | |
| return predict("temp.png") | |
| demo = gradio.Interface( | |
| fn=end2endpipeline, | |
| inputs=gradio.inputs.Audio(source="upload", type="filepath"), | |
| outputs=gradio.outputs.Label(num_top_classes=5), | |
| **interface_options, | |
| ) | |
| launch_options = { | |
| "enable_queue": True, | |
| "share": False, | |
| # thanks Alex for pointing this option to cache examples | |
| "cache_examples": True, | |
| } | |
| demo.launch(**launch_options) | |