Spaces:
Running
Running
| import gradio as gr | |
| from datasets import load_dataset | |
| import nbformat | |
| from nbconvert import HTMLExporter | |
| from traitlets.config import Config | |
| import os | |
| import shutil | |
| import base64 | |
| # Configuration for HTMLExporter | |
| config = Config() | |
| config.HTMLExporter.preprocessors = ["nbconvert.preprocessors.ExtractOutputPreprocessor"] | |
| html_exporter = HTMLExporter(config=config, template_name="classic") | |
| ds = load_dataset("lvwerra/jupyter-notebooks-edu-v4") | |
| ds_out = ds.filter(lambda x: x["contains_outputs"]) | |
| TMP_DIR = './tmp/' | |
| css = """ | |
| .notebook-container { | |
| max-height: 600px !important; | |
| overflow-y: auto !important; | |
| border: 1px solid #e1e5e9; | |
| border-radius: 8px; | |
| padding: 16px; | |
| background-color: #ffffff; | |
| } | |
| """ | |
| def reset_tmp_folder(): | |
| if os.path.exists(TMP_DIR): | |
| shutil.rmtree(TMP_DIR) | |
| os.makedirs(TMP_DIR) | |
| def embed_figures(html_body, resources): | |
| for key, value in resources['outputs'].items(): | |
| b64_figure = base64.b64encode(value).decode('utf-8') | |
| img_tag = f'data:image/png;base64,{b64_figure}' | |
| html_body = html_body.replace(key, img_tag) | |
| return html_body | |
| def update_max_index(score_option, output_option): | |
| if output_option == "All": | |
| max_index = len(ds[score_option])-1 | |
| else: | |
| max_index = len(ds_out[score_option])-1 | |
| return gr.Slider(maximum=max_index), gr.Number(value=0, maximum=max_index) | |
| def parse_notebook(score_options, output_options, index): | |
| reset_tmp_folder() | |
| if output_options == "All": | |
| sample = ds[score_options][index] | |
| else: | |
| sample = ds_out[score_options][index] | |
| notebook_string = sample["content"] | |
| notebook_response = sample["response"] | |
| notebook_id = sample["filename"] | |
| out_path = os.path.join(TMP_DIR, notebook_id) | |
| # Save the notebook string to a file | |
| with open(out_path, 'w') as f: | |
| f.write(notebook_string) | |
| notebook_parsed = nbformat.reads(notebook_string, as_version=4) | |
| (notebook_body, resources) = html_exporter.from_notebook_node(notebook_parsed) | |
| notebook_body = embed_figures(notebook_body, resources) | |
| return notebook_body, out_path, notebook_response | |
| with gr.Blocks(css=css) as demo: | |
| gr.Markdown("# Kaggle Notebooks") | |
| with gr.Row(): | |
| score_options = gr.Dropdown(["error","0", "1", "2", "3", "4", "5"], value="5", label="Notebook score", info="Select the assigned notebook score.") | |
| output_options = gr.Radio(["Outputs only", "All"], value="Outputs only", label="Output filter", info="Many notebooks contain no outputs.") | |
| index_slider = gr.Slider(minimum=0, maximum=len(ds_out["5"])-1, step=1, value=0, label="Index") | |
| file = gr.File() | |
| response = gr.Markdown("", label="LLM score justification") | |
| html = gr.HTML("", elem_classes=["notebook-container"]) | |
| score_options.change( | |
| fn=update_max_index, | |
| inputs=[score_options, output_options], | |
| outputs=[index_slider, index_slider] | |
| ) | |
| output_options.change( | |
| fn=update_max_index, | |
| inputs=[score_options, output_options], | |
| outputs=[index_slider, index_slider] | |
| ) | |
| score_options.change(fn=parse_notebook, inputs=[score_options, output_options, index_slider], outputs=[html, file, response]) | |
| output_options.change(fn=parse_notebook, inputs=[score_options, output_options, index_slider], outputs=[html, file, response]) | |
| index_slider.change(fn=parse_notebook, inputs=[score_options, output_options, index_slider], outputs=[html, file, response]) | |
| demo.load(fn=parse_notebook, inputs=[score_options, output_options, index_slider], outputs=[html, file, response]) | |
| demo.launch() |