Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -123,6 +123,7 @@ MODEL_ID_Q4B = "Qwen/Qwen3-VL-4B-Instruct"
|
|
| 123 |
processor_q4b = AutoProcessor.from_pretrained(MODEL_ID_Q4B, trust_remote_code=True)
|
| 124 |
model_q4b = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 125 |
MODEL_ID_Q4B,
|
|
|
|
| 126 |
trust_remote_code=True,
|
| 127 |
torch_dtype=torch.bfloat16
|
| 128 |
).to(device).eval()
|
|
@@ -132,6 +133,7 @@ MODEL_ID_Q8B = "Qwen/Qwen3-VL-8B-Instruct"
|
|
| 132 |
processor_q8b = AutoProcessor.from_pretrained(MODEL_ID_Q8B, trust_remote_code=True)
|
| 133 |
model_q8b = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 134 |
MODEL_ID_Q8B,
|
|
|
|
| 135 |
trust_remote_code=True,
|
| 136 |
torch_dtype=torch.bfloat16
|
| 137 |
).to(device).eval()
|
|
@@ -141,6 +143,7 @@ MODEL_ID_Q2B = "Qwen/Qwen3-VL-2B-Instruct"
|
|
| 141 |
processor_q2b = AutoProcessor.from_pretrained(MODEL_ID_Q2B, trust_remote_code=True)
|
| 142 |
model_q2b = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 143 |
MODEL_ID_Q2B,
|
|
|
|
| 144 |
trust_remote_code=True,
|
| 145 |
torch_dtype=torch.bfloat16
|
| 146 |
).to(device).eval()
|
|
@@ -150,6 +153,7 @@ MODEL_ID_M7B = "Qwen/Qwen2.5-VL-7B-Instruct"
|
|
| 150 |
processor_m7b = AutoProcessor.from_pretrained(MODEL_ID_M7B, trust_remote_code=True)
|
| 151 |
model_m7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 152 |
MODEL_ID_M7B,
|
|
|
|
| 153 |
trust_remote_code=True,
|
| 154 |
torch_dtype=torch.float16
|
| 155 |
).to(device).eval()
|
|
@@ -159,6 +163,7 @@ MODEL_ID_X3B = "Qwen/Qwen2.5-VL-3B-Instruct"
|
|
| 159 |
processor_x3b = AutoProcessor.from_pretrained(MODEL_ID_X3B, trust_remote_code=True)
|
| 160 |
model_x3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 161 |
MODEL_ID_X3B,
|
|
|
|
| 162 |
trust_remote_code=True,
|
| 163 |
torch_dtype=torch.float16
|
| 164 |
).to(device).eval()
|
|
|
|
| 123 |
processor_q4b = AutoProcessor.from_pretrained(MODEL_ID_Q4B, trust_remote_code=True)
|
| 124 |
model_q4b = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 125 |
MODEL_ID_Q4B,
|
| 126 |
+
attn_implementation="flash_attention_2",
|
| 127 |
trust_remote_code=True,
|
| 128 |
torch_dtype=torch.bfloat16
|
| 129 |
).to(device).eval()
|
|
|
|
| 133 |
processor_q8b = AutoProcessor.from_pretrained(MODEL_ID_Q8B, trust_remote_code=True)
|
| 134 |
model_q8b = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 135 |
MODEL_ID_Q8B,
|
| 136 |
+
attn_implementation="flash_attention_2",
|
| 137 |
trust_remote_code=True,
|
| 138 |
torch_dtype=torch.bfloat16
|
| 139 |
).to(device).eval()
|
|
|
|
| 143 |
processor_q2b = AutoProcessor.from_pretrained(MODEL_ID_Q2B, trust_remote_code=True)
|
| 144 |
model_q2b = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 145 |
MODEL_ID_Q2B,
|
| 146 |
+
attn_implementation="flash_attention_2",
|
| 147 |
trust_remote_code=True,
|
| 148 |
torch_dtype=torch.bfloat16
|
| 149 |
).to(device).eval()
|
|
|
|
| 153 |
processor_m7b = AutoProcessor.from_pretrained(MODEL_ID_M7B, trust_remote_code=True)
|
| 154 |
model_m7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 155 |
MODEL_ID_M7B,
|
| 156 |
+
attn_implementation="flash_attention_2",
|
| 157 |
trust_remote_code=True,
|
| 158 |
torch_dtype=torch.float16
|
| 159 |
).to(device).eval()
|
|
|
|
| 163 |
processor_x3b = AutoProcessor.from_pretrained(MODEL_ID_X3B, trust_remote_code=True)
|
| 164 |
model_x3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 165 |
MODEL_ID_X3B,
|
| 166 |
+
attn_implementation="flash_attention_2",
|
| 167 |
trust_remote_code=True,
|
| 168 |
torch_dtype=torch.float16
|
| 169 |
).to(device).eval()
|