Skip to content

Commit

Permalink
add warning once
Browse files Browse the repository at this point in the history
  • Loading branch information
younesbelkada committed Oct 2, 2023
1 parent 2b8c7b4 commit 4a3387d
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions src/transformers/models/mistral/modeling_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,17 @@ def forward(

query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)

use_sliding_windows = _is_flash_using_sliding_windows and self.config.sliding_window is not None and kv_seq_len > self.config.sliding_window
use_sliding_windows = _is_flash_using_sliding_windows and hasattr(self.config, "sliding_window") is not None and kv_seq_len > self.config.sliding_window

if not _is_flash_using_sliding_windows:
logger.warning_once(
"The current flash attention version does not support sliding window attention, for a more memory efficient implementation"
" make sure to upgrade flash-attn library."
)

if past_key_value is not None:
if use_sliding_windows and kv_seq_len > self.config.sliding_window:
# Activate slicing cache only if the config has a value `sliding_windows` attribute
if hasattr(self.config, "sliding_window") and kv_seq_len > self.config.sliding_window:
slicing_tokens = (kv_seq_len - self.config.sliding_window) + 1

past_key = past_key_value[0]
Expand Down Expand Up @@ -444,6 +451,8 @@ def _flash_attention_forward(
Attention dropout
softmax_scale (`float`, *optional*):
The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)
use_sliding_windows (`bool`, *optional*):
Whether to activate
"""
# Contains at least one padding token in the sequence
if padding_mask is not None:
Expand Down

1 comment on commit 4a3387d

@edmondja
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about dilated attentions like longformer ? It was mentioned by mistral team

Please sign in to comment.