Spaces:
Paused
Paused
disable flash_attention
Browse files- app_dialogue.py +6 -6
app_dialogue.py
CHANGED
|
@@ -2,11 +2,11 @@ import os
|
|
| 2 |
import subprocess
|
| 3 |
|
| 4 |
# Install flash attention
|
| 5 |
-
subprocess.run(
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
)
|
| 10 |
|
| 11 |
|
| 12 |
import copy
|
|
@@ -31,7 +31,7 @@ MODELS = {
|
|
| 31 |
"xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoModelForVision2Seq.from_pretrained(
|
| 32 |
"Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5",
|
| 33 |
torch_dtype=torch.bfloat16,
|
| 34 |
-
_attn_implementation="flash_attention_2",
|
| 35 |
trust_remote_code=True
|
| 36 |
).to(DEVICE),
|
| 37 |
}
|
|
|
|
| 2 |
import subprocess
|
| 3 |
|
| 4 |
# Install flash attention
|
| 5 |
+
# subprocess.run(
|
| 6 |
+
# "pip install flash-attn --no-build-isolation",
|
| 7 |
+
# env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
| 8 |
+
# shell=True,
|
| 9 |
+
# )
|
| 10 |
|
| 11 |
|
| 12 |
import copy
|
|
|
|
| 31 |
"xgen-mm-phi3-mini-instruct-interleave-r-v1.5": AutoModelForVision2Seq.from_pretrained(
|
| 32 |
"Salesforce/xgen-mm-phi3-mini-instruct-interleave-r-v1.5",
|
| 33 |
torch_dtype=torch.bfloat16,
|
| 34 |
+
# _attn_implementation="flash_attention_2",
|
| 35 |
trust_remote_code=True
|
| 36 |
).to(DEVICE),
|
| 37 |
}
|