jedick commited on
Commit
2890297
·
1 Parent(s): b00d122

Disable Flash Attention (build error)

Browse files
Files changed (2) hide show
  1. main.py +1 -1
  2. requirements.txt +1 -1
main.py CHANGED
@@ -157,7 +157,7 @@ def GetChatModel(compute_mode, ckpt_dir=None):
157
  # Enable FlashAttention (requires pip install flash-attn)
158
  # https://huggingface.co/docs/transformers/en/attention_interface
159
  # https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention
160
- attn_implementation="flash_attention_2",
161
  )
162
  # For Flash Attention version of Qwen3
163
  tokenizer.padding_side = "left"
 
157
  # Enable FlashAttention (requires pip install flash-attn)
158
  # https://huggingface.co/docs/transformers/en/attention_interface
159
  # https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention
160
+ # attn_implementation="flash_attention_2",
161
  )
162
  # For Flash Attention version of Qwen3
163
  tokenizer.padding_side = "left"
requirements.txt CHANGED
@@ -5,7 +5,7 @@ chromadb==0.6.3
5
  # ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
6
 
7
  # FlashAttention
8
- flash-attn==2.8.2
9
 
10
  # Stated requirements:
11
  # Gemma 3: transformers>=4.50
 
5
  # ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
6
 
7
  # FlashAttention
8
+ #flash-attn==2.8.2
9
 
10
  # Stated requirements:
11
  # Gemma 3: transformers>=4.50