Skip to content

Commit d391cd2

Browse files
committed
weight matching complete
1 parent 6986253 commit d391cd2

File tree

2 files changed

+3
-8
lines changed

2 files changed

+3
-8
lines changed

Diff for: keras_hub/src/models/qwen_moe/qwen_moe_decoder.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,6 @@ def build(self, decoder_sequence_shape):
6363
)
6464
)
6565

66-
self._feedforward_layernorm = QwenLayerNorm(
67-
epsilon=self.layer_norm_epsilon,
68-
dtype=self.dtype_policy,
69-
name="feedforward_layernorm",
70-
)
71-
self._feedforward_layernorm.build(decoder_sequence_shape)
7266
self.activation = keras.activations.get(self.activation_fn)
7367
self.built = True
7468

@@ -201,7 +195,7 @@ def call(self, hidden_states):
201195
final_hidden_states, top_x[:, None], updated_values
202196
)
203197

204-
shared_expert_output = self.shared_expert(hidden_states)
198+
shared_expert_output = self.shared_expert_dense(hidden_states)
205199
shared_expert_output = (
206200
ops.sigmoid(self.shared_expert_gate_dense(hidden_states))
207201
* shared_expert_output

Diff for: tools/checkpoint_conversion/convert_qwen_moe_checkpoints.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
torch.set_default_device(device)
1616

1717
from keras import ops # noqa: E402
18-
from transformers import AutoModelForCausalLM, AutoTokenizer # noqa: E402
18+
from transformers import AutoModelForCausalLM # noqa: E402
19+
from transformers import AutoTokenizer # noqa: E402
1920

2021
import keras_hub # noqa: E402
2122

0 commit comments

Comments
 (0)