[HOTFIX] Add Docstring for QwenCausalLM (#2279)

kanpuriyanawab · web-flow · commit 54c3465e9a59 · 2025-06-02T11:58:57.000-07:00
* add docstring

* update
diff --git a/keras_hub/src/models/qwen/qwen_causal_lm.py b/keras_hub/src/models/qwen/qwen_causal_lm.py
@@ -17,6 +17,130 @@
     ]
 )
 class QwenCausalLM(CausalLM):
+    """An end-to-end Qwen model for causal language modeling.
+
+    A causal language model (LM) predicts the next token based on previous
+    tokens. This task setup can be used to train the model unsupervised on plain
+    text input, or to autoregressively generate plain text similar to the data
+    used for training. This task can be used for pre-training or fine-tuning a
+    Qwen model, simply by calling `fit()`.
+
+    This model has a `generate()` method, which generates text based on a
+    prompt. The generation strategy used is controlled by an additional
+    `sampler` argument on `compile()`. You can recompile the model with
+    different `keras_hub.samplers` objects to control the generation.
+    By default, `"greedy"` sampling will be used.
+
+    This model can optionally be configured with a `preprocessor` layer, in
+    which case it will automatically apply preprocessing to string inputs during
+    `fit()`, `predict()`, `evaluate()`, and `generate()`. This is done by
+    default when creating the model with `from_preset()`.
+
+    Args:
+        backbone: A `keras_hub.models.QwenBackbone` instance.
+        preprocessor: A `keras_hub.models.QwenCausalLMPreprocessor` or
+            `None`. If `None`, this model will not apply preprocessing, and
+            inputs should be preprocessed before calling the model.
+
+    Examples:
+
+    Use `generate()` to do text generation.
+    ```python
+    qwen_lm = keras_hub.models.QwenCausalLM.from_preset("qwen2.5_0.5b_en")
+    qwen_lm.generate("I want to say", max_length=30)
+
+    # Generate with batched prompts.
+    qwen_lm.generate(["This is a", "Where are you"], max_length=30)
+    ```
+
+    Compile the `generate()` function with a custom sampler.
+    ```python
+    qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
+    qwen_lm.compile(sampler="top_k")
+    qwen_lm.generate("I want to say", max_length=30)
+
+    qwen_lm.compile(sampler=keras_hub.samplers.BeamSampler(num_beams=2))
+    qwen_lm.generate("I want to say", max_length=30)
+    ```
+
+    Use `generate()` without preprocessing.
+    ```python
+    prompt = {
+        # Token ids for "<bos> Qwen is".
+        "token_ids": np.array([[2, 12345, 678, 0, 0, 0, 0]] * 2),
+        # Use `"padding_mask"` to indicate values that should not be overridden.
+        "padding_mask": np.array([[1, 1, 1, 0, 0, 0, 0]] * 2),
+    }
+
+    qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset(
+        "qwen2.5_0.5b_en",
+        preprocessor=None,
+    )
+    qwen_lm.generate(prompt)
+    ```
+
+    Call `fit()` on a single batch.
+    ```python
+    features = ["The quick brown fox jumped.", "I forgot my homework."]
+    qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
+    qwen_lm.fit(x=features, batch_size=2)
+    ```
+
+    Call `fit()` with LoRA fine-tuning enabled.
+    ```python
+    features = ["The quick brown fox jumped.", "I forgot my homework."]
+    qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset("qwen2.5_0.5b_en")
+    qwen_lm.backbone.enable_lora(rank=4)
+    qwen_lm.fit(x=features, batch_size=2)
+    ```
+
+    Call `fit()` without preprocessing.
+    ```python
+    x = {
+        # Token ids for "<bos> Qwen is a language model<eos>"
+        "token_ids": np.array([[2, 12345, 678, 543, 9876, 1, 0, 0]] * 2),
+        "padding_mask": np.array([[1, 1, 1, 1, 1, 1, 0, 0]] * 2),
+    }
+    y = np.array([[12345, 678, 543, 9876, 1, 0, 0, 0]] * 2)
+    sw = np.array([[1, 1, 1, 1, 1, 0, 0, 0]] * 2)
+
+    qwen_lm = keras_hub.models.QwenMoeCausalLM.from_preset(
+        "qwen2.5_0.5b_en",
+        preprocessor=None,
+    )
+    qwen_lm.fit(x=x, y=y, sample_weight=sw, batch_size=2)
+    ```
+
+    Custom backbone and vocabulary.
+    ```python
+    tokenizer = keras_hub.models.QwenMoeTokenizer(
+        proto="qwen_moe_vocab.spm",
+    )
+    preprocessor = keras_hub.models.QwenMoeCausalLMPreprocessor(
+        tokenizer=tokenizer,
+        sequence_length=128,
+    )
+    backbone = keras_hub.models.QwenMoeBackbone(
+        vocabulary_size=151936,
+        num_layers=28,
+        num_query_heads=16,
+        num_key_value_heads=8,
+        hidden_dim=2048,
+        intermediate_dim=4096,
+        moe_intermediate_dim=128,
+        shared_expert_intermediate_dim=4096,
+        num_experts=60,
+        top_k=4,
+        max_sequence_length=4096,
+    )
+    qwen_lm = keras_hub.models.QwenMoeCausalLM(
+        backbone=backbone,
+        preprocessor=preprocessor,
+    )
+    qwen_lm.fit(x=features, batch_size=2)
+    ```
+    """
+
     backbone_cls = QwenBackbone
     preprocessor_cls = QwenCausalLMPreprocessor
 
diff --git a/keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py b/keras_hub/src/models/qwen/qwen_causal_lm_preprocessor.py
@@ -11,6 +11,72 @@
     ]
 )
 class QwenCausalLMPreprocessor(CausalLMPreprocessor):
+    """Qwen Causal LM preprocessor.
+
+    This preprocessing layer is meant for use with
+    `keras_hub.models.QwenCausalLM`. By default, it will take in batches of
+    strings, and return outputs in a `(x, y, sample_weight)` format, where the
+    `y` label is the next token id in the `x` sequence.
+
+    For use with generation, the layer also exposes two methods
+    `generate_preprocess()` and `generate_postprocess()`. When this preprocessor
+    is attached to a `keras_hub.models.QwenCausalLM` instance, these methods
+    will be called implicitly in `generate()`. They can also be called
+    standalone (e.g. to precompute preprocessing inputs for generation in a
+    separate process).
+
+    Args:
+        tokenizer: A `keras_hub.models.QwenTokenizer` instance.
+        sequence_length: The length of the packed inputs.
+        add_start_token: If `True`, the preprocessor will prepend the tokenizer
+            start token to each input sequence. Default is `True`.
+        add_end_token: If `True`, the preprocessor will append the tokenizer
+            end token to each input sequence. Default is `False`.
+
+    Call arguments:
+        x: A string, `tf.Tensor` or list of python strings.
+        y: Label data. Should always be `None` as the layer generates labels.
+        sample_weight: Label weights. Should always be `None` as the layer
+            generates label weights.
+        sequence_length: Pass to override the configured `sequence_length` of
+            the layer.
+
+    Examples:
+    ```python
+    # Load the preprocessor from a preset.
+    preprocessor = keras_hub.models.QwenCausalLMPreprocessor.from_preset(
+        "qwen2.5_0.5b_en"
+    )
+
+    # Tokenize and pack a single sentence.
+    sentence = tf.constant("League of legends")
+    preprocessor(sentence)
+    # Same output.
+    preprocessor("League of legends")
+
+    # Tokenize a batch of sentences.
+    sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
+    preprocessor(sentences)
+    # Same output.
+    preprocessor(["Taco tuesday", "Fish taco please!"])
+
+    # Map a dataset to preprocess a single sentence.
+    features = tf.constant(
+        [
+            "Avatar 2 is amazing!",
+            "Well, I am not sure.",
+        ]
+    )
+    labels = tf.constant([1, 0])
+    ds = tf.data.Dataset.from_tensor_slices((features, labels))
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+
+    # Map a dataset to preprocess unlabled sentences.
+    ds = tf.data.Dataset.from_tensor_slices(features)
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+    ```
+    """
+
     backbone_cls = QwenBackbone
     tokenizer_cls = QwenTokenizer
 
diff --git a/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py b/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_preprocessor.py
@@ -4,12 +4,74 @@
 from keras_hub.src.models.qwen_moe.qwen_moe_tokenizer import QwenMoeTokenizer
 
 
-@keras_hub_export(
-    [
-        "keras_hub.models.QwenMoeCausalLMPreprocessor",
-    ]
-)
+@keras_hub_export("keras_hub.models.QwenMoeCausalLMPreprocessor")
 class QwenMoeCausalLMPreprocessor(CausalLMPreprocessor):
+    """Qwen-Moe Causal LM preprocessor.
+
+    This preprocessing layer is meant for use with
+    `keras_hub.models.QwenMoeCausalLM`. By default, it will take in batches of
+    strings, and return outputs in a `(x, y, sample_weight)` format, where the
+    `y` label is the next token id in the `x` sequence.
+
+    For use with generation, the layer also exposes two methods
+    `generate_preprocess()` and `generate_postprocess()`. When this preprocessor
+    is attached to a `keras_hub.models.QwenMoeCausalLM` instance, these methods
+    will be called implicitly in `generate()`. They can also be called
+    standalone (e.g. to precompute preprocessing inputs for generation in a
+    separate process).
+
+    Args:
+        tokenizer: A `keras_hub.models.QwenMoeTokenizer` instance.
+        sequence_length: The length of the packed inputs.
+        add_start_token: If `True`, the preprocessor will prepend the tokenizer
+            start token to each input sequence. Default is `True`.
+        add_end_token: If `True`, the preprocessor will append the tokenizer
+            end token to each input sequence. Default is `False`.
+
+    Call arguments:
+        x: A string, `tf.Tensor` or list of python strings.
+        y: Label data. Should always be `None` as the layer generates labels.
+        sample_weight: Label weights. Should always be `None` as the layer
+            generates label weights.
+        sequence_length: Pass to override the configured `sequence_length` of
+            the layer.
+
+    Examples:
+    ```python
+    # Load the preprocessor from a preset.
+    preprocessor = keras_hub.models.QwenMoeCausalLMPreprocessor.from_preset(
+        "qwen2.5_0.5b_en"
+    )
+
+    # Tokenize and pack a single sentence.
+    sentence = tf.constant("League of legends")
+    preprocessor(sentence)
+    # Same output.
+    preprocessor("League of legends")
+
+    # Tokenize a batch of sentences.
+    sentences = tf.constant(["Taco tuesday", "Fish taco please!"])
+    preprocessor(sentences)
+    # Same output.
+    preprocessor(["Taco tuesday", "Fish taco please!"])
+
+    # Map a dataset to preprocess a single sentence.
+    features = tf.constant(
+        [
+            "Avatar 2 is amazing!",
+            "Well, I am not sure.",
+        ]
+    )
+    labels = tf.constant([1, 0])
+    ds = tf.data.Dataset.from_tensor_slices((features, labels))
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+
+    # Map a dataset to preprocess unlabled sentences.
+    ds = tf.data.Dataset.from_tensor_slices(features)
+    ds = ds.map(preprocessor, num_parallel_calls=tf.data.AUTOTUNE)
+    ```
+    """
+
     backbone_cls = QwenMoeBackbone
     tokenizer_cls = QwenMoeTokenizer