Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
·
2890297
1
Parent(s):
b00d122
Disable Flash Attention (build error)
Browse files- main.py +1 -1
- requirements.txt +1 -1
main.py
CHANGED
|
@@ -157,7 +157,7 @@ def GetChatModel(compute_mode, ckpt_dir=None):
|
|
| 157 |
# Enable FlashAttention (requires pip install flash-attn)
|
| 158 |
# https://huggingface.co/docs/transformers/en/attention_interface
|
| 159 |
# https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention
|
| 160 |
-
attn_implementation="flash_attention_2",
|
| 161 |
)
|
| 162 |
# For Flash Attention version of Qwen3
|
| 163 |
tokenizer.padding_side = "left"
|
|
|
|
| 157 |
# Enable FlashAttention (requires pip install flash-attn)
|
| 158 |
# https://huggingface.co/docs/transformers/en/attention_interface
|
| 159 |
# https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention
|
| 160 |
+
# attn_implementation="flash_attention_2",
|
| 161 |
)
|
| 162 |
# For Flash Attention version of Qwen3
|
| 163 |
tokenizer.padding_side = "left"
|
requirements.txt
CHANGED
|
@@ -5,7 +5,7 @@ chromadb==0.6.3
|
|
| 5 |
# ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
|
| 6 |
|
| 7 |
# FlashAttention
|
| 8 |
-
flash-attn==2.8.2
|
| 9 |
|
| 10 |
# Stated requirements:
|
| 11 |
# Gemma 3: transformers>=4.50
|
|
|
|
| 5 |
# ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
|
| 6 |
|
| 7 |
# FlashAttention
|
| 8 |
+
#flash-attn==2.8.2
|
| 9 |
|
| 10 |
# Stated requirements:
|
| 11 |
# Gemma 3: transformers>=4.50
|