Spaces:

prithivMLmods
/

Qwen3-VL-Outpost

Running on Zero

App Files Files Community

prithivMLmods commited on 15 days ago

Commit

e5f1b3d

verified ·

1 Parent(s): 14d580f

update app

Browse files

Files changed (1) hide show

app.py +5 -0

app.py CHANGED Viewed

@@ -123,6 +123,7 @@ MODEL_ID_Q4B = "Qwen/Qwen3-VL-4B-Instruct"
 processor_q4b = AutoProcessor.from_pretrained(MODEL_ID_Q4B, trust_remote_code=True)
 model_q4b = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Q4B,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
@@ -132,6 +133,7 @@ MODEL_ID_Q8B = "Qwen/Qwen3-VL-8B-Instruct"
 processor_q8b = AutoProcessor.from_pretrained(MODEL_ID_Q8B, trust_remote_code=True)
 model_q8b = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Q8B,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
@@ -141,6 +143,7 @@ MODEL_ID_Q2B = "Qwen/Qwen3-VL-2B-Instruct"
 processor_q2b = AutoProcessor.from_pretrained(MODEL_ID_Q2B, trust_remote_code=True)
 model_q2b = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Q2B,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
@@ -150,6 +153,7 @@ MODEL_ID_M7B = "Qwen/Qwen2.5-VL-7B-Instruct"
 processor_m7b = AutoProcessor.from_pretrained(MODEL_ID_M7B, trust_remote_code=True)
 model_m7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M7B,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -159,6 +163,7 @@ MODEL_ID_X3B = "Qwen/Qwen2.5-VL-3B-Instruct"
 processor_x3b = AutoProcessor.from_pretrained(MODEL_ID_X3B, trust_remote_code=True)
 model_x3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X3B,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()

 processor_q4b = AutoProcessor.from_pretrained(MODEL_ID_Q4B, trust_remote_code=True)
 model_q4b = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Q4B,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
 processor_q8b = AutoProcessor.from_pretrained(MODEL_ID_Q8B, trust_remote_code=True)
 model_q8b = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Q8B,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
 processor_q2b = AutoProcessor.from_pretrained(MODEL_ID_Q2B, trust_remote_code=True)
 model_q2b = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_Q2B,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16
 ).to(device).eval()
 processor_m7b = AutoProcessor.from_pretrained(MODEL_ID_M7B, trust_remote_code=True)
 model_m7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M7B,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_x3b = AutoProcessor.from_pretrained(MODEL_ID_X3B, trust_remote_code=True)
 model_x3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X3B,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()