File tree Expand file tree Collapse file tree 1 file changed +6
-6
lines changed
torchtitan/models/deepseek_v3 Expand file tree Collapse file tree 1 file changed +6
-6
lines changed Original file line number Diff line number Diff line change 100100 qk_rope_head_dim = 64 ,
101101 v_head_dim = 128 ,
102102 mscale = 0.70 ,
103- use_flex_attn = True ,
104- attn_mask_type = "block_causal" ,
103+ # use_flex_attn=True,
104+ # attn_mask_type="block_causal",
105105 ),
106106 "236B" : DeepSeekV3ModelArgs (
107107 vocab_size = 102400 ,
127127 qk_nope_head_dim = 128 ,
128128 qk_rope_head_dim = 64 ,
129129 v_head_dim = 128 ,
130- use_flex_attn = True ,
131- attn_mask_type = "block_causal" ,
130+ # use_flex_attn=True,
131+ # attn_mask_type="block_causal",
132132 ),
133133 "671B" : DeepSeekV3ModelArgs (
134134 vocab_size = 129280 ,
154154 qk_nope_head_dim = 128 ,
155155 qk_rope_head_dim = 64 ,
156156 v_head_dim = 128 ,
157- use_flex_attn = True ,
158- attn_mask_type = "block_causal" ,
157+ # use_flex_attn=True,
158+ # attn_mask_type="block_causal",
159159 ),
160160}
161161
You can’t perform that action at this time.
0 commit comments