prithivMLmods commited on
Commit
e5f1b3d
·
verified ·
1 Parent(s): 14d580f

update app

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -123,6 +123,7 @@ MODEL_ID_Q4B = "Qwen/Qwen3-VL-4B-Instruct"
123
  processor_q4b = AutoProcessor.from_pretrained(MODEL_ID_Q4B, trust_remote_code=True)
124
  model_q4b = Qwen3VLForConditionalGeneration.from_pretrained(
125
  MODEL_ID_Q4B,
 
126
  trust_remote_code=True,
127
  torch_dtype=torch.bfloat16
128
  ).to(device).eval()
@@ -132,6 +133,7 @@ MODEL_ID_Q8B = "Qwen/Qwen3-VL-8B-Instruct"
132
  processor_q8b = AutoProcessor.from_pretrained(MODEL_ID_Q8B, trust_remote_code=True)
133
  model_q8b = Qwen3VLForConditionalGeneration.from_pretrained(
134
  MODEL_ID_Q8B,
 
135
  trust_remote_code=True,
136
  torch_dtype=torch.bfloat16
137
  ).to(device).eval()
@@ -141,6 +143,7 @@ MODEL_ID_Q2B = "Qwen/Qwen3-VL-2B-Instruct"
141
  processor_q2b = AutoProcessor.from_pretrained(MODEL_ID_Q2B, trust_remote_code=True)
142
  model_q2b = Qwen3VLForConditionalGeneration.from_pretrained(
143
  MODEL_ID_Q2B,
 
144
  trust_remote_code=True,
145
  torch_dtype=torch.bfloat16
146
  ).to(device).eval()
@@ -150,6 +153,7 @@ MODEL_ID_M7B = "Qwen/Qwen2.5-VL-7B-Instruct"
150
  processor_m7b = AutoProcessor.from_pretrained(MODEL_ID_M7B, trust_remote_code=True)
151
  model_m7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
152
  MODEL_ID_M7B,
 
153
  trust_remote_code=True,
154
  torch_dtype=torch.float16
155
  ).to(device).eval()
@@ -159,6 +163,7 @@ MODEL_ID_X3B = "Qwen/Qwen2.5-VL-3B-Instruct"
159
  processor_x3b = AutoProcessor.from_pretrained(MODEL_ID_X3B, trust_remote_code=True)
160
  model_x3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
161
  MODEL_ID_X3B,
 
162
  trust_remote_code=True,
163
  torch_dtype=torch.float16
164
  ).to(device).eval()
 
123
  processor_q4b = AutoProcessor.from_pretrained(MODEL_ID_Q4B, trust_remote_code=True)
124
  model_q4b = Qwen3VLForConditionalGeneration.from_pretrained(
125
  MODEL_ID_Q4B,
126
+ attn_implementation="flash_attention_2",
127
  trust_remote_code=True,
128
  torch_dtype=torch.bfloat16
129
  ).to(device).eval()
 
133
  processor_q8b = AutoProcessor.from_pretrained(MODEL_ID_Q8B, trust_remote_code=True)
134
  model_q8b = Qwen3VLForConditionalGeneration.from_pretrained(
135
  MODEL_ID_Q8B,
136
+ attn_implementation="flash_attention_2",
137
  trust_remote_code=True,
138
  torch_dtype=torch.bfloat16
139
  ).to(device).eval()
 
143
  processor_q2b = AutoProcessor.from_pretrained(MODEL_ID_Q2B, trust_remote_code=True)
144
  model_q2b = Qwen3VLForConditionalGeneration.from_pretrained(
145
  MODEL_ID_Q2B,
146
+ attn_implementation="flash_attention_2",
147
  trust_remote_code=True,
148
  torch_dtype=torch.bfloat16
149
  ).to(device).eval()
 
153
  processor_m7b = AutoProcessor.from_pretrained(MODEL_ID_M7B, trust_remote_code=True)
154
  model_m7b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
155
  MODEL_ID_M7B,
156
+ attn_implementation="flash_attention_2",
157
  trust_remote_code=True,
158
  torch_dtype=torch.float16
159
  ).to(device).eval()
 
163
  processor_x3b = AutoProcessor.from_pretrained(MODEL_ID_X3B, trust_remote_code=True)
164
  model_x3b = Qwen2_5_VLForConditionalGeneration.from_pretrained(
165
  MODEL_ID_X3B,
166
+ attn_implementation="flash_attention_2",
167
  trust_remote_code=True,
168
  torch_dtype=torch.float16
169
  ).to(device).eval()