Skip to content

Commit 54c3465

Browse files
[HOTFIX] Add Docstring for QwenCausalLM (#2279)
* add docstring * update
1 parent 449144a commit 54c3465

File tree

3 files changed

+257
-5
lines changed

3 files changed

+257
-5
lines changed

keras_hub/src/models/qwen/qwen_causal_lm.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,130 @@
1717
]
1818
)
1919
class QwenCausalLM(CausalLM):
20+
"""An end-to-end Qwen model for causal language modeling.
21+
22+
A causal language model (LM) predicts the next token based on previous
23+
tokens. This task setup can be used to train the model unsupervised on plain
24+
text input, or to autoregressively generate plain text similar to the data
25+
used for training. This task can be used for pre-training or fine-tuning a
26+
Qwen model, simply by calling `fit()`.
27+
28+
This model has a `generate()` method, which generates text based on a
29+
prompt. The generation strategy used is controlled by an additional
30+
`sampler` argument on `compile()`. You can recompile the model with
31+
different `keras_hub.samplers` objects to control the generation.
32+
By default, `"greedy"` sampling will be used.
33+
34+
This model can optionally be configured with a `preprocessor` layer, in
35+
which case it will automatically apply preprocessing to string inputs during
36+
`fit()`, `predict()`, `evaluate()`, and `generate()`. This is done by
37+
default when creating the model with `from_preset()`.
38+
39+
Args:
40+
backbone: A `keras_hub.models.QwenBackbone` instance.
41+
preprocessor: A `keras_hub.models.QwenCausalLMPreprocessor` or
42+
`None`. If `None`, this model will not apply preprocessing, and
43+
inputs should be preprocessed before calling the model.
44+
45+
Examples:
46+
47+
Use `generate()` to do text generation.
48+
```python
49+
qwen_lm = keras_hub.models.QwenCausalLM.from_preset("qwen2.5_0.5b_en")
50+
qwen_lm.generate("I want to say", max_length=30)
51+
52+
# Generate with batched prompts.
53+
qwen_lm.generate(["This is a", "Where are you"], max_length=30)
54+
```
55+
56+
Compile the `generate()` function with a custom sampler.
57+
```python
58+
qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
59+
qwen_lm.compile(sampler="top_k")
60+
qwen_lm.generate("I want to say", max_length=30)
61+
62+
qwen_lm.compile(sampler=keras_hub.samplers.BeamSampler(num_beams=2))
63+
qwen_lm.generate("I want to say", max_length=30)
64+
```
65+
66+
Use `generate()` without preprocessing.
67+
```python
68+
prompt = {
69+
# Token ids for "<bos> Qwen is".
70+
"token_ids": np.array([[2, 12345, 678, 0, 0, 0, 0]] * 2),
71+
# Use `"padding_mask"` to indicate values that should not be overridden.
72+
"padding_mask": np.array([[1, 1, 1, 0, 0, 0, 0]] * 2),
73+
}
74+
75+
qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset(
76+
"qwen2.5_0.5b_en",
77+
preprocessor=None,
78+
)
79+
qwen_lm.generate(prompt)
80+
```
81+
82+
Call `fit()` on a single batch.
83+
```python
84+
features = ["The quick brown fox jumped.", "I forgot my homework."]
85+
qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
86+
qwen_lm.fit(x=features, batch_size=2)
87+
```
88+
89+
Call `fit()` with LoRA fine-tuning enabled.
90+
```python
91+
features = ["The quick brown fox jumped.", "I forgot my homework."]
92+
qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
93+
qwen_lm.backbone.enable_lora(rank=4)
94+
qwen_lm.fit(x=features, batch_size=2)
95+
```
96+
97+
Call `fit()` without preprocessing.
98+
```python
99+
x = {
100+
# Token ids for "<bos> Qwen is a language model<eos>"
101+
"token_ids": np.array([[2, 12345, 678, 543, 9876, 1, 0, 0]] * 2),
102+
"padding_mask": np.array([[1, 1, 1, 1, 1, 1, 0, 0]] * 2),
103+
}
104+
y = np.array([[12345, 678, 543, 9876, 1, 0, 0, 0]] * 2)
105+
sw = np.array([[1, 1, 1, 1, 1, 0, 0, 0]] * 2)
106+
107+
qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset(
108+
"qwen2.5_0.5b_en",
109+
preprocessor=None,
110+
)
111+
qwen_lm.fit(x=x, y=y, sample_weight=sw, batch_size=2)
112+
```
113+
114+
Custom backbone and vocabulary.
115+
```python
116+
tokenizer = keras_hub.models.QwenMoeTokenizer(
117+
proto="qwen_moe_vocab.spm",
118+
)
119+
preprocessor = keras_hub.models.QwenMoeCausalLMPreprocessor(
120+
tokenizer=tokenizer,
121+
sequence_length=128,
122+
)
123+
backbone = keras_hub.models.QwenMoeBackbone(
124+
vocabulary_size=151936,
125+
num_layers=28,
126+
num_query_heads=16,
127+
num_key_value_heads=8,
128+
hidden_dim=2048,
129+
intermediate_dim=4096,
130+
moe_intermediate_dim=128,
131+
shared_expert_intermediate_dim=4096,
132+
num_experts=60,
133+
top_k=4,
134+
max_sequence_length=4096,
135+
)
136+
qwen_lm = keras_hub.models.QwenMoeCausalLM(
137+
backbone=backbone,
138+
preprocessor=preprocessor,
139+
)
140+
qwen_lm.fit(x=features, batch_size=2)
141+
```
142+
"""
143+
20144
backbone_cls = QwenBackbone
21145
preprocessor_cls = QwenCausalLMPreprocessor
22146

keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,72 @@
1111
]
1212
)
1313
class QwenCausalLMPreprocessor(CausalLMPreprocessor):
14+
"""Qwen Causal LM preprocessor.
15+
16+
This preprocessing layer is meant for use with
17+
`keras_hub.models.QwenCausalLM`. By default, it will take in batches of
18+
strings, and return outputs in a `(x, y, sample_weight)` format, where the
19+
`y` label is the next token id in the `x` sequence.
20+
21+
For use with generation, the layer also exposes two methods
22+
`generate_preprocess()` and `generate_postprocess()`. When this preprocessor
23+
is attached to a `keras_hub.models.QwenCausalLM` instance, these methods
24+
will be called implicitly in `generate()`. They can also be called
25+
standalone (e.g. to precompute preprocessing inputs for generation in a
26+
separate process).
27+
28+
Args:
29+
tokenizer: A `keras_hub.models.QwenTokenizer` instance.
30+
sequence_length: The length of the packed inputs.
31+
add_start_token: If `True`, the preprocessor will prepend the tokenizer
32+
start token to each input sequence. Default is `True`.
33+
add_end_token: If `True`, the preprocessor will append the tokenizer
34+
end token to each input sequence. Default is `False`.
35+
36+
Call arguments:
37+
x: A string, `tf.Tensor` or list of python strings.
38+
y: Label data. Should always be `None` as the layer generates labels.
39+
sample_weight: Label weights. Should always be `None` as the layer
40+
generates label weights.
41+
sequence_length: Pass to override the configured `sequence_length` of
42+
the layer.
43+
44+
Examples:
45+
```python
46+
# Load the preprocessor from a preset.
47+
preprocessor = keras_hub.models.QwenCausalLMPreprocessor.from_preset(
48+
"qwen2.5_0.5b_en"
49+
)
50+
51+
# Tokenize and pack a single sentence.
52+
sentence = tf.constant("League of legends")
53+
preprocessor(sentence)
54+
# Same output.
55+
preprocessor("League of legends")
56+
57+
# Tokenize a batch of sentences.
58+
sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
59+
preprocessor(sentences)
60+
# Same output.
61+
preprocessor(["Taco tuesday", "Fish taco please!"])
62+
63+
# Map a dataset to preprocess a single sentence.
64+
features = tf.constant(
65+
[
66+
"Avatar 2 is amazing!",
67+
"Well, I am not sure.",
68+
]
69+
)
70+
labels = tf.constant([1, 0])
71+
ds = tf.data.Dataset.from_tensor_slices((features, labels))
72+
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
73+
74+
# Map a dataset to preprocess unlabled sentences.
75+
ds = tf.data.Dataset.from_tensor_slices(features)
76+
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
77+
```
78+
"""
79+
1480
backbone_cls = QwenBackbone
1581
tokenizer_cls = QwenTokenizer
1682

keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,74 @@
44
from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import QwenMoeTokenizer
55

66

7-
@keras_hub_export(
8-
[
9-
"keras_hub.models.QwenMoeCausalLMPreprocessor",
10-
]
11-
)
7+
@keras_hub_export("keras_hub.models.QwenMoeCausalLMPreprocessor")
128
class QwenMoeCausalLMPreprocessor(CausalLMPreprocessor):
9+
"""Qwen-Moe Causal LM preprocessor.
10+
11+
This preprocessing layer is meant for use with
12+
`keras_hub.models.QwenMoeCausalLM`. By default, it will take in batches of
13+
strings, and return outputs in a `(x, y, sample_weight)` format, where the
14+
`y` label is the next token id in the `x` sequence.
15+
16+
For use with generation, the layer also exposes two methods
17+
`generate_preprocess()` and `generate_postprocess()`. When this preprocessor
18+
is attached to a `keras_hub.models.QwenMoeCausalLM` instance, these methods
19+
will be called implicitly in `generate()`. They can also be called
20+
standalone (e.g. to precompute preprocessing inputs for generation in a
21+
separate process).
22+
23+
Args:
24+
tokenizer: A `keras_hub.models.QwenMoeTokenizer` instance.
25+
sequence_length: The length of the packed inputs.
26+
add_start_token: If `True`, the preprocessor will prepend the tokenizer
27+
start token to each input sequence. Default is `True`.
28+
add_end_token: If `True`, the preprocessor will append the tokenizer
29+
end token to each input sequence. Default is `False`.
30+
31+
Call arguments:
32+
x: A string, `tf.Tensor` or list of python strings.
33+
y: Label data. Should always be `None` as the layer generates labels.
34+
sample_weight: Label weights. Should always be `None` as the layer
35+
generates label weights.
36+
sequence_length: Pass to override the configured `sequence_length` of
37+
the layer.
38+
39+
Examples:
40+
```python
41+
# Load the preprocessor from a preset.
42+
preprocessor = keras_hub.models.QwenMoeCausalLMPreprocessor.from_preset(
43+
"qwen2.5_0.5b_en"
44+
)
45+
46+
# Tokenize and pack a single sentence.
47+
sentence = tf.constant("League of legends")
48+
preprocessor(sentence)
49+
# Same output.
50+
preprocessor("League of legends")
51+
52+
# Tokenize a batch of sentences.
53+
sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
54+
preprocessor(sentences)
55+
# Same output.
56+
preprocessor(["Taco tuesday", "Fish taco please!"])
57+
58+
# Map a dataset to preprocess a single sentence.
59+
features = tf.constant(
60+
[
61+
"Avatar 2 is amazing!",
62+
"Well, I am not sure.",
63+
]
64+
)
65+
labels = tf.constant([1, 0])
66+
ds = tf.data.Dataset.from_tensor_slices((features, labels))
67+
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
68+
69+
# Map a dataset to preprocess unlabled sentences.
70+
ds = tf.data.Dataset.from_tensor_slices(features)
71+
ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
72+
```
73+
"""
74+
1375
backbone_cls = QwenMoeBackbone
1476
tokenizer_cls = QwenMoeTokenizer
1577

0 commit comments

Comments
 (0)