Safetensors
sailvl
custom_code
ctranslate2-4you commited on
Commit
e023f80
·
verified ·
1 Parent(s): 3c8b3be

Update modeling_qwen3.py

Browse files

fix for qwen3 and transformers

Files changed (1) hide show
  1. modeling_qwen3.py +11 -1
modeling_qwen3.py CHANGED
@@ -41,7 +41,6 @@ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_u
41
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
42
  from transformers.processing_utils import Unpack
43
  from transformers.utils import (
44
- LossKwargs,
45
  add_code_sample_docstrings,
46
  add_start_docstrings,
47
  add_start_docstrings_to_model_forward,
@@ -49,6 +48,17 @@ from transformers.utils import (
49
  logging,
50
  replace_return_docstrings,
51
  )
 
 
 
 
 
 
 
 
 
 
 
52
  from transformers.utils.deprecation import deprecate_kwarg
53
  from .configuration_qwen3 import Qwen3Config
54
  # from .spec_sdpa_attention import spec_sdpa_attention_forward
 
41
  from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
42
  from transformers.processing_utils import Unpack
43
  from transformers.utils import (
 
44
  add_code_sample_docstrings,
45
  add_start_docstrings,
46
  add_start_docstrings_to_model_forward,
 
48
  logging,
49
  replace_return_docstrings,
50
  )
51
+
52
+ # LossKwargs compatibility - location varies by transformers version
53
+ try:
54
+ from transformers.utils import LossKwargs
55
+ except ImportError:
56
+ try:
57
+ from transformers.loss.loss_utils import LossKwargs
58
+ except ImportError:
59
+ from transformers.modeling_flash_attention_utils import FlashAttentionKwargs as _FAK
60
+ class LossKwargs(_FAK):
61
+ pass
62
  from transformers.utils.deprecation import deprecate_kwarg
63
  from .configuration_qwen3 import Qwen3Config
64
  # from .spec_sdpa_attention import spec_sdpa_attention_forward