pytorch · lessw2020 · Jun 1, 2025 · Jun 1, 2025 · Jun 1, 2025 · Jun 2, 2025
@@ -12,7 +12,7 @@ set -ex
 # LOG_RANK=0,1 NGPU=4 ./run_train.sh
 NGPU=${NGPU:-"8"}
 export LOG_RANK=${LOG_RANK:-0}
-CONFIG_FILE=${CONFIG_FILE:-"./torchtitan/models/llama3/train_configs/debug_model.toml"}
+CONFIG_FILE=${CONFIG_FILE:-"./torchtitan/experiments/llama4/train_configs/debug_model.toml"}
 
 overrides=""
 if [ $# -ne 0 ]; then

diff --git a/torchtitan/experiments/deepseek_v3/generate.py b/torchtitan/experiments/deepseek_v3/generate.py
@@ -8,6 +8,7 @@
 
 # use inference.sh "Your Question Here?" to run inference with a single prompt.
 
+import logging
 import sys
 from dataclasses import dataclass
 
@@ -19,15 +20,26 @@
 from model_config import deepseek_config_registry
 from torch.distributed.device_mesh import DeviceMesh
 from torch.distributed.pipelining import PipelineStage, ScheduleGPipe
-from transformers import AutoTokenizer
+from torchtitan.tools.logging import init_logger, logger
 
 from torchtitan.tools.utils import Color
+from transformers import AutoTokenizer
 
 # Uncomment the model you want to run.
 model_id, mesh_shape = "deepseek-ai/DeepSeek-V2-Lite-Chat", (1, 4)
 # model_id, mesh_shape = "deepseek-ai/deepseek-v3", (8, 4)
 
 
+def remove_notset_root_handlers():
+    """
+    Remove handlers with level NOTSET from root logger.
+    Titan's logger is set, and thus we can differentiate between these.
+    """
+    for handler in logger.handlers[:]:
+        if handler.level == logging.NOTSET:
+            logger.removeHandler(handler)
+
+
 def colorize_chat(text, user_color=None, assistant_color=None, output_color=None):
     """Parse and colorize chat output with optional colors for each role."""
     lines = text.split("\n")
@@ -127,7 +139,7 @@ def create_model(dist_config: DistConfig):
     model_args.ep_size = dist_config.ep_size
     model_args.num_stages = dist_config.pp_size
     model_args.stage_idx = dist_config.pp_rank
-    model_args.max_seq_len = 4096  # 16384
+    model_args.max_seq_len = 256  # 16384
 
     with dist_config.device, dist_config.mesh:
         model = DeepseekForCausalLM(model_args)
@@ -224,7 +236,7 @@ def generate(
     tokenizer,
     dist_config,
     messages: list[dict],
-    n_tokens: int = 200,
+    n_tokens: int = 50,
 ):
     rank = dist.get_rank()
     device = dist_config.device
@@ -353,6 +365,10 @@ def generate_with_cuda_graph(
 
 
 if __name__ == "__main__":
+    # init_logger()
+    # get rid of HF duplicate logs
+    # remove_notset_root_handlers()
+
     # Get user prompt from command line arguments
     user_prompt = "What is 2+2?"  # Default prompt
     if len(sys.argv) > 1:
@@ -375,7 +391,7 @@ def generate_with_cuda_graph(
     ]
 
     generate(model, pp_schedule, tokenizer, dist_config, messages)
-    generate_with_cuda_graph(model, tokenizer, dist_config, messages)
+    # generate_with_cuda_graph(model, tokenizer, dist_config, messages)
 
     if rank == 0:
         print(f"\n{color.yellow}Closing inference mesh...{color.reset}")