Update modeling_deepseek.py

#10
by mukulp - opened
Files changed (1) hide show
  1. modeling_deepseek.py +3 -1
modeling_deepseek.py CHANGED
@@ -58,9 +58,11 @@ from .configuration_deepseek import DeepseekV2Config
58
  import torch.distributed as dist
59
  import numpy as np
60
 
61
- if is_flash_attn_2_available():
62
  from flash_attn import flash_attn_func, flash_attn_varlen_func
63
  from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
 
 
64
 
65
 
66
  # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
 
58
  import torch.distributed as dist
59
  import numpy as np
60
 
61
+ try:
62
  from flash_attn import flash_attn_func, flash_attn_varlen_func
63
  from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input # noqa
64
+ except:
65
+ pass
66
 
67
 
68
  # This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.