NotaGenX

Runtime error

App Files Files Community

ElectricAlexis commited on Mar 18

Commit

8e170be

verified ·

1 Parent(s): 94c7b25

Upload 2 files

Browse files

Files changed (2) hide show

app.py +57 -69
inference.py +18 -20

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from inference import inference_patch
 from convert import abc2xml, xml2, pdf2img
-# 读取 prompt 组合
 with open('prompts.txt', 'r') as f:
     prompts = f.readlines()
@@ -25,12 +25,12 @@ for prompt in prompts:
     parts = prompt.split('_')
     valid_combinations.add((parts[0], parts[1], parts[2]))
-# 准备下拉框选项
 periods = sorted({p for p, _, _ in valid_combinations})
 composers = sorted({c for _, c, _ in valid_combinations})
 instruments = sorted({i for _, _, i in valid_combinations})
-# 动态更新作曲家、乐器下拉选项
 def update_components(period, composer):
     if not period:
         return [
@@ -54,7 +54,7 @@ def update_components(period, composer):
         )
     ]
-# 自定义实时流，用于把模型推理过程输出到前端
 class RealtimeStream(TextIOBase):
     def __init__(self, queue):
         self.queue = queue
@@ -81,7 +81,7 @@ def convert_files(abc_content, period, composer, instrumentation):
     with open(filename_base_postinst + ".abc", "w", encoding="utf-8") as f:
         f.write(postprocessed_inst_abc)
-    # 转换文件
     file_paths = {'abc': abc_filename}
     try:
         # abc2xml
@@ -115,15 +115,15 @@ def convert_files(abc_content, period, composer, instrumentation):
         })
     except Exception as e:
-        raise gr.Error(f"文件处理失败: {str(e)}")
     return file_paths
-# 翻页控制函数
 def update_page(direction, data):
     """
-    data 里面包含了 'pages','current_page','base' 三个关键信息
     """
     if not data:
         return None, gr.update(interactive=False), gr.update(interactive=False), data
@@ -134,9 +134,9 @@ def update_page(direction, data):
         data['current_page'] += 1
     current_page_index = data['current_page']
-    # 更新图片路径
     new_image = f"{data['base']}_page_{current_page_index+1}.png"
-    # 当 current_page==0 时，prev_btn 不可用；当 current_page==pages-1 时，next_btn 不可用
     prev_btn_state = gr.update(interactive=(current_page_index > 0))
     next_btn_state = gr.update(interactive=(current_page_index < data['pages'] - 1))
@@ -146,13 +146,13 @@ def update_page(direction, data):
 @spaces.GPU(duration=600)
 def generate_music(period, composer, instrumentation):
     """
-    需要保证每次 yield 的返回值数量一致。
-    我们这里准备返回 5 个值，对应:
-    1) process_output (中间推理信息)
-    2) final_output (最终 ABC)
-    3) pdf_image (PDF 第一页对应的 png 路径)
-    4) audio_player (mp3 路径)
-    5) pdf_state (翻页用的 state)
     """
     # Set a different random seed each time based on current timestamp
     random_seed = int(time.time()) % 10000
@@ -175,7 +175,7 @@ def generate_music(period, composer, instrumentation):
         pass
     if (period, composer, instrumentation) not in valid_combinations:
-        # 如果组合非法，直接抛出错误
         raise gr.Error("Invalid prompt combination! Please re-select from the period options")
     output_queue = queue.Queue()
@@ -186,7 +186,7 @@ def generate_music(period, composer, instrumentation):
     def run_inference():
         try:
-            # 使用下载的模型权重路径进行推理
             result = inference_patch(period, composer, instrumentation)
             result_container.append(result)
         finally:
@@ -201,40 +201,40 @@ def generate_music(period, composer, instrumentation):
     audio_file = None
     pdf_state = None
-    # 先持续读中间输出
     while thread.is_alive():
         try:
             text = output_queue.get(timeout=0.1)
             process_output += text
-            # 暂时没有最终 ABC，还没有转文件
             yield process_output, final_output_abc, pdf_image, audio_file, pdf_state, gr.update(value=None, visible=False)
         except queue.Empty:
             continue
-    # 线程结束后，把剩余的队列都拿出来
     while not output_queue.empty():
         text = output_queue.get()
         process_output += text
-    # 最终推理结果
     final_result = result_container[0] if result_container else ""
-    # 显示转换文件的提示
     final_output_abc = "Converting files..."
     yield process_output, final_output_abc, pdf_image, audio_file, pdf_state, gr.update(value=None, visible=False)
-    # 做文件转换
     try:
         file_paths = convert_files(final_result, period, composer, instrumentation)
         final_output_abc = final_result
-        # 拿到第一张图片和 mp3 文件
         if file_paths['pages'] > 0:
             pdf_image = f"{file_paths['base']}_page_1.png"
         audio_file = file_paths['mp3']
-        pdf_state = file_paths  # 直接把转换后的信息字典拿来存到 state
-        # 准备下载文件列表
         download_list = []
         if 'abc' in file_paths and os.path.exists(file_paths['abc']):
             download_list.append(file_paths['abc'])
@@ -247,60 +247,60 @@ def generate_music(period, composer, instrumentation):
         if 'mp3' in file_paths and os.path.exists(file_paths['mp3']):
             download_list.append(file_paths['mp3'])
     except Exception as e:
-        # 如果失败了，把错误信息返回到输出框
         yield process_output, f"Error converting files: {str(e)}", None, None, None, gr.update(value=None, visible=False)
         return
-    # 最后一次 yield，带上所有信息 - 修改此处让组件可见
     yield process_output, final_output_abc, pdf_image, audio_file, pdf_state, gr.update(value=download_list, visible=True)
 def get_file(file_type, period, composer, instrumentation):
     """
-    返回本地的指定类型文件，用于 Gradio 下载
     """
-    # 这里其实需要你根据先前保存下来的具体文件路径来返回，演示时可以简化
-    # 如果是按 timestamp 去匹配，可以把转换的文件都存在某个目录下再拿最新的
-    # 这里仅做示例:
     possible_files = [f for f in os.listdir('.') if f.endswith(f'.{file_type}')]
     if not possible_files:
         return None
-    # 简单返回最新的
     possible_files.sort(key=os.path.getmtime)
     return possible_files[-1]
 css = """
-/* 紧凑按钮样式 */
 button[size="sm"] {
     padding: 4px 8px !important;
     margin: 2px !important;
     min-width: 60px;
 }
-/* PDF预览区 */
 #pdf-preview {
-    border-radius: 8px;  /* 圆角 */
-    box-shadow: 0 2px 8px rgba(0,0,0,0.1);  /* 阴影 */
 }
 .page-btn {
-    padding: 12px !important;  /* 增大点击区域 */
-    margin: auto !important;   /* 垂直居中 */
 }
-/* 按钮悬停效果 */
 .page-btn:hover {
     background: #f0f0f0 !important;
     transform: scale(1.05);
 }
-/* 布局调整 */
 .gr-row {
-    gap: 10px !important;  /* 元素间距 */
 }
-/* 音频播放器 */
 .audio-panel {
     margin-top: 15px !important;
     max-width: 400px;
@@ -310,23 +310,13 @@ button[size="sm"] {
     height: 200px !important;
 }
-/* 保存功能区 */
 .save-as-row {
     margin-top: 15px;
     padding: 10px;
     border-top: 1px solid #eee;
 }
-.save-as-label {
-    font-weight: bold;
-    margin-right: 10px;
-    align-self: center;
-}
-.save-buttons {
-    gap: 5px;  /* 按钮间距 */
-}
 /* Download files styling */
 .download-files {
     margin-top: 15px;
@@ -339,12 +329,12 @@ button[size="sm"] {
 with gr.Blocks(css=css) as demo:
     gr.Markdown("## NotaGen")
-    # 用于保存 PDF 页数、当前页等信息
     pdf_state = gr.State()
     with gr.Column():
         with gr.Row():
-            # 左侧栏
             with gr.Column():
                 with gr.Row():
                     period_dd = gr.Dropdown(
@@ -384,18 +374,16 @@ with gr.Blocks(css=css) as demo:
                     placeholder="Post-processed ABC scores will be shown here..."
                 )
-                # 音频播放
                 audio_player = gr.Audio(
                     label="Audio Preview",
                     format="mp3",
                     interactive=False,
-                    # container=False,
-                    # elem_id="audio-preview"
                 )
-            # 右侧栏
             with gr.Column():
-                # 图片容器
                 pdf_image = gr.Image(
                     label="Sheet Music Preview",
                     show_label=False,
@@ -406,7 +394,7 @@ with gr.Blocks(css=css) as demo:
                     show_download_button=False
                 )
-                # 翻页按钮
                 with gr.Row():
                     prev_btn = gr.Button(
                         "⬅️ Last Page",
@@ -430,7 +418,7 @@ with gr.Blocks(css=css) as demo:
                 type="filepath"  # Make sure this is set to filepath
             )
-    # 下拉框联动
     period_dd.change(
         update_components,
         inputs=[period_dd, composer_dd],
@@ -442,26 +430,26 @@ with gr.Blocks(css=css) as demo:
         outputs=[composer_dd, instrument_dd]
     )
-    # 点击生成按钮，注意 outputs 要和 generate_music 里每次 yield 保持一致
     generate_btn.click(
         generate_music,
         inputs=[period_dd, composer_dd, instrument_dd],
         outputs=[process_output, final_output, pdf_image, audio_player, pdf_state, download_files]
     )
-    # 翻页
     prev_signal = gr.Textbox(value="prev", visible=False)
     next_signal = gr.Textbox(value="next", visible=False)
     prev_btn.click(
         update_page,
-        inputs=[prev_signal, pdf_state],  # ✅ 使用组件
         outputs=[pdf_image, prev_btn, next_btn, pdf_state]
     )
     next_btn.click(
         update_page,
-        inputs=[next_signal, pdf_state],  # ✅ 使用组件
         outputs=[pdf_image, prev_btn, next_btn, pdf_state]
     )

 from convert import abc2xml, xml2, pdf2img
+# Read prompt combinations
 with open('prompts.txt', 'r') as f:
     prompts = f.readlines()
     parts = prompt.split('_')
     valid_combinations.add((parts[0], parts[1], parts[2]))
+# Prepare dropdown options
 periods = sorted({p for p, _, _ in valid_combinations})
 composers = sorted({c for _, c, _ in valid_combinations})
 instruments = sorted({i for _, _, i in valid_combinations})
+# Dynamically update composer and instrument dropdown options
 def update_components(period, composer):
     if not period:
         return [
         )
     ]
+# Custom realtime stream for outputting model inference process to frontend
 class RealtimeStream(TextIOBase):
     def __init__(self, queue):
         self.queue = queue
     with open(filename_base_postinst + ".abc", "w", encoding="utf-8") as f:
         f.write(postprocessed_inst_abc)
+    # Convert files
     file_paths = {'abc': abc_filename}
     try:
         # abc2xml
         })
     except Exception as e:
+        raise gr.Error(f"File processing failed: {str(e)}")
     return file_paths
+# Page navigation control function
 def update_page(direction, data):
     """
+    data contains three key pieces of information: 'pages', 'current_page', and 'base'
     """
     if not data:
         return None, gr.update(interactive=False), gr.update(interactive=False), data
         data['current_page'] += 1
     current_page_index = data['current_page']
+    # Update image path
     new_image = f"{data['base']}_page_{current_page_index+1}.png"
+    # When current_page==0, prev_btn is disabled; when current_page==pages-1, next_btn is disabled
     prev_btn_state = gr.update(interactive=(current_page_index > 0))
     next_btn_state = gr.update(interactive=(current_page_index < data['pages'] - 1))
 @spaces.GPU(duration=600)
 def generate_music(period, composer, instrumentation):
     """
+    Must ensure each yield returns the same number of values.
+    We're preparing to return 5 values, corresponding to:
+    1) process_output (intermediate inference information)
+    2) final_output (final ABC)
+    3) pdf_image (path to the PNG of the first page of the PDF)
+    4) audio_player (mp3 path)
+    5) pdf_state (state for page navigation)
     """
     # Set a different random seed each time based on current timestamp
     random_seed = int(time.time()) % 10000
         pass
     if (period, composer, instrumentation) not in valid_combinations:
+        # If the combination is invalid, raise an error
         raise gr.Error("Invalid prompt combination! Please re-select from the period options")
     output_queue = queue.Queue()
     def run_inference():
         try:
+            # Use downloaded model weights path for inference
             result = inference_patch(period, composer, instrumentation)
             result_container.append(result)
         finally:
     audio_file = None
     pdf_state = None
+    # First continuously read intermediate output
     while thread.is_alive():
         try:
             text = output_queue.get(timeout=0.1)
             process_output += text
+            # No final ABC yet, files not yet converted
             yield process_output, final_output_abc, pdf_image, audio_file, pdf_state, gr.update(value=None, visible=False)
         except queue.Empty:
             continue
+    # After thread ends, get all remaining items from the queue
     while not output_queue.empty():
         text = output_queue.get()
         process_output += text
+    # Final inference result
     final_result = result_container[0] if result_container else ""
+    # Display file conversion prompt
     final_output_abc = "Converting files..."
     yield process_output, final_output_abc, pdf_image, audio_file, pdf_state, gr.update(value=None, visible=False)
+    # Convert files
     try:
         file_paths = convert_files(final_result, period, composer, instrumentation)
         final_output_abc = final_result
+        # Get the first image and mp3 file
         if file_paths['pages'] > 0:
             pdf_image = f"{file_paths['base']}_page_1.png"
         audio_file = file_paths['mp3']
+        pdf_state = file_paths  # Directly use the converted information dictionary as state
+        # Prepare download file list
         download_list = []
         if 'abc' in file_paths and os.path.exists(file_paths['abc']):
             download_list.append(file_paths['abc'])
         if 'mp3' in file_paths and os.path.exists(file_paths['mp3']):
             download_list.append(file_paths['mp3'])
     except Exception as e:
+        # If conversion fails, return error message to output box
         yield process_output, f"Error converting files: {str(e)}", None, None, None, gr.update(value=None, visible=False)
         return
+   # Final yield with all information - modify here to make component visible
     yield process_output, final_output_abc, pdf_image, audio_file, pdf_state, gr.update(value=download_list, visible=True)
 def get_file(file_type, period, composer, instrumentation):
     """
+    Returns the local file of specified type for Gradio download
     """
+    # Here you actually need to return based on specific file paths saved earlier, simplified for demo
+    # If matching by timestamp, you can store all converted files in a directory and get the latest
+    # This is just an example:
     possible_files = [f for f in os.listdir('.') if f.endswith(f'.{file_type}')]
     if not possible_files:
         return None
+    # Simply return the latest
     possible_files.sort(key=os.path.getmtime)
     return possible_files[-1]
 css = """
+/* Compact button style */
 button[size="sm"] {
     padding: 4px 8px !important;
     margin: 2px !important;
     min-width: 60px;
 }
+/* PDF preview area */
 #pdf-preview {
+    border-radius: 8px;  /* Rounded corners */
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1);  /* Shadow */
 }
 .page-btn {
+    padding: 12px !important;  /* Increase clickable area */
+    margin: auto !important;   /* Vertical center */
 }
+/* Button hover effect */
 .page-btn:hover {
     background: #f0f0f0 !important;
     transform: scale(1.05);
 }
+/* Layout adjustment */
 .gr-row {
+    gap: 10px !important;  /* Element spacing */
 }
+/* Audio player */
 .audio-panel {
     margin-top: 15px !important;
     max-width: 400px;
     height: 200px !important;
 }
+/* Save functionality area */
 .save-as-row {
     margin-top: 15px;
     padding: 10px;
     border-top: 1px solid #eee;
 }
 /* Download files styling */
 .download-files {
     margin-top: 15px;
 with gr.Blocks(css=css) as demo:
     gr.Markdown("## NotaGen")
+    # For storing PDF page count, current page and other information
     pdf_state = gr.State()
     with gr.Column():
         with gr.Row():
+            # Left sidebar
             with gr.Column():
                 with gr.Row():
                     period_dd = gr.Dropdown(
                     placeholder="Post-processed ABC scores will be shown here..."
                 )
+                # Audio playback
                 audio_player = gr.Audio(
                     label="Audio Preview",
                     format="mp3",
                     interactive=False,
                 )
+            # Right sidebar
             with gr.Column():
+                # Image container
                 pdf_image = gr.Image(
                     label="Sheet Music Preview",
                     show_label=False,
                     show_download_button=False
                 )
+                # Page navigation buttons
                 with gr.Row():
                     prev_btn = gr.Button(
                         "⬅️ Last Page",
                 type="filepath"  # Make sure this is set to filepath
             )
+    # Dropdown linking
     period_dd.change(
         update_components,
         inputs=[period_dd, composer_dd],
         outputs=[composer_dd, instrument_dd]
     )
+    # Click generate button, note outputs must match each yield in generate_music
     generate_btn.click(
         generate_music,
         inputs=[period_dd, composer_dd, instrument_dd],
         outputs=[process_output, final_output, pdf_image, audio_player, pdf_state, download_files]
     )
+    # Page navigation
     prev_signal = gr.Textbox(value="prev", visible=False)
     next_signal = gr.Textbox(value="next", visible=False)
     prev_btn.click(
         update_page,
+        inputs=[prev_signal, pdf_state],  # ✅ Use component
         outputs=[pdf_image, prev_btn, next_btn, pdf_state]
     )
     next_btn.click(
         update_page,
+        inputs=[next_signal, pdf_state],  # ✅ Use component
         outputs=[pdf_image, prev_btn, next_btn, pdf_state]
     )

inference.py CHANGED Viewed

@@ -69,30 +69,30 @@ def download_model_weights():
 def prepare_model_for_kbit_training(model, use_gradient_checkpointing=True):
     """
-    为 k-bit 训练准备模型。
-    功能包括：
-    1. 将模型转换为混合精度（FP16）。
-    2. 禁用不需要的梯度计算。
-    3. 启用梯度检查点（可选）。
     """
-    # 将模型转换为混合精度
     model = model.to(dtype=torch.float16)
-    # 禁用嵌入层的梯度
     for param in model.parameters():
         if param.dtype == torch.float32:
             param.requires_grad = False
-    # 启用梯度检查点
     if use_gradient_checkpointing:
         model.gradient_checkpointing_enable()
     return model
-# 应用量化配置
 model = prepare_model_for_kbit_training(
     model,
-    use_gradient_checkpointing=False  # 推理时不需要梯度检查
 )
 print("Parameter Number: " + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
@@ -146,19 +146,19 @@ def complete_brackets(s):
     stack = []
     bracket_map = {'{': '}', '[': ']', '(': ')'}
-    # 遍历每个字符，处理括号匹配
     for char in s:
         if char in bracket_map:
             stack.append(char)
         elif char in bracket_map.values():
-            # 查找对应的左括号
             for key, value in bracket_map.items():
                 if value == char:
                     if stack and stack[-1] == key:
                         stack.pop()
-                    break  # 找到对应的右括号，处理下一个字符
-    # 补全缺失的右括号（按栈中剩余左括号的逆序）
     completion = ''.join(bracket_map[c] for c in reversed(stack))
     return s + completion
@@ -333,26 +333,24 @@ def inference_patch(period, composer, instrumentation):
                 predicted_patch = torch.tensor([predicted_patch], device=device)  # (1, 16)
                 input_patches = torch.cat([input_patches, predicted_patch], dim=1)  # (1, 16 * patch_len)
-                if len(byte_list) > 102400:  # 过长
                     failure_flag = True
                     break
-                if time.time() - start_time > 20 * 60:  # 生成时间不得超过20min
                     failure_flag = True
                     break
                 if input_patches.shape[1] >= PATCH_LENGTH * PATCH_SIZE and not end_flag:
-                    # 做流式切片
                     print('Stream generating...')
                     metadata = ''.join(metadata_byte_list)
                     context_tunebody = ''.join(context_tunebody_byte_list)
                     if '\n' not in context_tunebody:
-                        # 生成的全是metadata，放弃
-                        break
                     context_tunebody_liness = context_tunebody.split('\n')
-                    if not context_tunebody.endswith('\n'):  # 如果生成结果最后一行未完结
                         context_tunebody_liness = [context_tunebody_liness[i] + '\n' for i in range(len(context_tunebody_liness) - 1)] + [context_tunebody_liness[-1]]
                     else:
                         context_tunebody_liness = [context_tunebody_liness[i] + '\n' for i in range(len(context_tunebody_liness))]

 def prepare_model_for_kbit_training(model, use_gradient_checkpointing=True):
     """
+    Prepare model for k-bit training.
+    Features include:
+    1. Convert model to mixed precision (FP16).
+    2. Disable unnecessary gradient computations.
+    3. Enable gradient checkpointing (optional).
     """
+    # Convert model to mixed precision
     model = model.to(dtype=torch.float16)
+    # Disable gradients for embedding layers
     for param in model.parameters():
         if param.dtype == torch.float32:
             param.requires_grad = False
+    # Enable gradient checkpointing
     if use_gradient_checkpointing:
         model.gradient_checkpointing_enable()
     return model
 model = prepare_model_for_kbit_training(
     model,
+    use_gradient_checkpointing=False
 )
 print("Parameter Number: " + str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
     stack = []
     bracket_map = {'{': '}', '[': ']', '(': ')'}
+    # Iterate through each character, handle bracket matching
     for char in s:
         if char in bracket_map:
             stack.append(char)
         elif char in bracket_map.values():
+            # Find the corresponding left bracket
             for key, value in bracket_map.items():
                 if value == char:
                     if stack and stack[-1] == key:
                         stack.pop()
+                    break  # Found matching right bracket, process next character
+    # Complete missing right brackets (in reverse order of remaining left brackets in stack)
     completion = ''.join(bracket_map[c] for c in reversed(stack))
     return s + completion
                 predicted_patch = torch.tensor([predicted_patch], device=device)  # (1, 16)
                 input_patches = torch.cat([input_patches, predicted_patch], dim=1)  # (1, 16 * patch_len)
+                if len(byte_list) > 102400:
                     failure_flag = True
                     break
+                if time.time() - start_time > 10 * 60:
                     failure_flag = True
                     break
                 if input_patches.shape[1] >= PATCH_LENGTH * PATCH_SIZE and not end_flag:
                     print('Stream generating...')
                     metadata = ''.join(metadata_byte_list)
                     context_tunebody = ''.join(context_tunebody_byte_list)
                     if '\n' not in context_tunebody:
+                        break   # Generated content is all metadata, abandon
                     context_tunebody_liness = context_tunebody.split('\n')
+                    if not context_tunebody.endswith('\n'):
                         context_tunebody_liness = [context_tunebody_liness[i] + '\n' for i in range(len(context_tunebody_liness) - 1)] + [context_tunebody_liness[-1]]
                     else:
                         context_tunebody_liness = [context_tunebody_liness[i] + '\n' for i in range(len(context_tunebody_liness))]