Skip to content

Commit db22479

Browse files
committed
[dsv3] Turn off Flex for AP
1 parent 8769396 commit db22479

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

torchtitan/models/deepseek_v3/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@
100100
qk_rope_head_dim=64,
101101
v_head_dim=128,
102102
mscale=0.70,
103-
use_flex_attn=True,
104-
attn_mask_type="block_causal",
103+
# use_flex_attn=True,
104+
# attn_mask_type="block_causal",
105105
),
106106
"236B": DeepSeekV3ModelArgs(
107107
vocab_size=102400,
@@ -127,8 +127,8 @@
127127
qk_nope_head_dim=128,
128128
qk_rope_head_dim=64,
129129
v_head_dim=128,
130-
use_flex_attn=True,
131-
attn_mask_type="block_causal",
130+
# use_flex_attn=True,
131+
# attn_mask_type="block_causal",
132132
),
133133
"671B": DeepSeekV3ModelArgs(
134134
vocab_size=129280,
@@ -154,8 +154,8 @@
154154
qk_nope_head_dim=128,
155155
qk_rope_head_dim=64,
156156
v_head_dim=128,
157-
use_flex_attn=True,
158-
attn_mask_type="block_causal",
157+
# use_flex_attn=True,
158+
# attn_mask_type="block_causal",
159159
),
160160
}
161161

0 commit comments

Comments
 (0)