Spaces:

broadfield-dev
/

url2markdown

Sleeping

App Files Files Community

broadfield-dev commited on Oct 26

Commit

e2ba8a5

verified ·

1 Parent(s): b13a27e

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -21

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import torch
 from PIL import Image
-# --- ADDED: Import BitsAndBytesConfig for CPU quantization ---
 from transformers import Qwen3VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
 import os
 from playwright.sync_api import sync_playwright
@@ -9,10 +9,11 @@ import time
 import numpy as np
 # --- Configuration ---
-# --- CHANGED: Updated model options to reflect INT8 CPU quantization ---
 MODEL_OPTIONS = {
     "Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
-    "Quantized (INT8) - Faster on CPU": "Qwen/Qwen3-VL-2B-Instruct", # We use the same base model for quantization
 }
 # --- DETAILED PROMPT TEMPLATE ---
@@ -63,25 +64,21 @@ def load_model(model_name):
     model, processor = None, None
     try:
-        # --- CHANGED: New logic for CPU-compatible quantization ---
-        if "Quantized" in model_name:
-            # Use bitsandbytes for 8-bit quantization on CPU
             quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-            model = Qwen3VLForConditionalGeneration.from_pretrained(
-                model_id,
-                quantization_config=quantization_config,
-                device_map="auto",
-                trust_remote_code=True
-            )
-        else:
-            # Standard loading for the full-precision model
-            model = Qwen3VLForConditionalGeneration.from_pretrained(
-                model_id,
-                device_map="auto",
-                trust_remote_code=True
-            )
-        # The processor is the same for both versions
         processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
         yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
@@ -153,7 +150,8 @@ with gr.Blocks() as demo:
         with gr.Row():
             model_selector = gr.Radio(
                 choices=list(MODEL_OPTIONS.keys()),
-                value="Quantized (INT8) - Faster on CPU", # Default to the faster option
                 label="Select Model"
             )
             load_model_button = gr.Button("Load/Switch Model")

 import gradio as gr
 import torch
 from PIL import Image
+# Import BitsAndBytesConfig for on-the-fly quantization
 from transformers import Qwen3VLForConditionalGeneration, AutoProcessor, BitsAndBytesConfig
 import os
 from playwright.sync_api import sync_playwright
 import numpy as np
 # --- Configuration ---
+# --- ADDED: New INT4 option pointing to the same base model ---
 MODEL_OPTIONS = {
     "Standard (BF16)": "Qwen/Qwen3-VL-2B-Instruct",
+    "Quantized (INT8) - Faster on CPU": "Qwen/Qwen3-VL-2B-Instruct",
+    "Quantized (INT4) - Fastest on CPU": "Qwen/Qwen3-VL-2B-Instruct",
 }
 # --- DETAILED PROMPT TEMPLATE ---
     model, processor = None, None
     try:
+        quantization_config = None
+        # --- ADDED: Logic to handle INT4, INT8, or no quantization ---
+        if "INT4" in model_name:
+            quantization_config = BitsAndBytesConfig(load_in_4bit=True)
+        elif "INT8" in model_name:
             quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+        # A single, clean call to from_pretrained handles all cases
+        model = Qwen3VLForConditionalGeneration.from_pretrained(
+            model_id,
+            quantization_config=quantization_config, # Will be None if not quantizing
+            device_map="auto",
+            trust_remote_code=True
+        )
         processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
         yield f"Status: {model_name} model loaded successfully.", gr.update(interactive=True), model, processor
         with gr.Row():
             model_selector = gr.Radio(
                 choices=list(MODEL_OPTIONS.keys()),
+                # --- CHANGED: Default to the fastest option ---
+                value="Quantized (INT4) - Fastest on CPU",
                 label="Select Model"
             )
             load_model_button = gr.Button("Load/Switch Model")