Merge branch 'main' into fix/llama-cpp-install

deep1401 · web-flow · commit 84e910f29e60 · 2025-04-29T09:05:17.000-07:00
diff --git a/transformerlab/plugins/autotrain_sft_trainer/index.json b/transformerlab/plugins/autotrain_sft_trainer/index.json
@@ -4,14 +4,16 @@
   "description": "SFT training using Huggingface autotrain",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.1.10",
+  "version": "0.1.11",
   "model_architectures": [
     "LlamaForCausalLM",
     "MistralForCausalLM",
     "MixtralForCausalLM",
     "PhiForCausalLM",
     "GemmaForCausalLM",
     "Qwen2ForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM",
     "Phi3ForCausalLM"
   ],
   "supported_hardware_architectures": ["cuda"],
diff --git a/transformerlab/plugins/dpo_orpo_simpo_trainer_llama_factory/index.json b/transformerlab/plugins/dpo_orpo_simpo_trainer_llama_factory/index.json
@@ -4,14 +4,16 @@
   "description": "An implementation of several Preference Optimization methods using Llama Factory.",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.0.8",
+  "version": "0.0.9",
   "model_architectures": [
     "LlamaForCausalLM",
     "MistralForCausalLM",
     "MixtralForCausalLM",
     "PhiForCausalLM",
     "GemmaForCausalLM",
     "Qwen2ForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM",
     "Phi3ForCausalLM"
   ],
   "supported_hardware_architectures": ["cuda"],
diff --git a/transformerlab/plugins/grpo_trainer_multi_gpu/index.json b/transformerlab/plugins/grpo_trainer_multi_gpu/index.json
@@ -4,8 +4,8 @@
   "description": "A GPRO trainer based using a Multi GPU setup trained using TRL and Accelerate",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.0.4",
-  "model_architectures": ["LlamaForCausalLM", "Qwen2ForCausalLM"],
+  "version": "0.0.5",
+  "model_architectures": ["LlamaForCausalLM", "Qwen2ForCausalLM", "Qwen3ForCausalLM", "Qwen3MoeForCausalLM"],
   "git": "",
   "url": "",
   "files": ["main.py", "setup.sh"],
diff --git a/transformerlab/plugins/grpo_trainer_multi_gpu/main.py b/transformerlab/plugins/grpo_trainer_multi_gpu/main.py
@@ -4,18 +4,23 @@
 import subprocess
 
 from transformerlab.sdk.v1.train import tlab_trainer
+from transformerlab.plugin import get_python_executable
 
 # Add custom arguments
 tlab_trainer.add_argument(
     "--launched_with_accelerate", action="store_true", help="Flag to prevent recursive subprocess launching"
 )
 
 
+
 def setup_accelerate_environment():
     """Set up the environment for the accelerate launch subprocess"""
     current_dir = os.path.dirname(os.path.abspath(__file__))
+    plugin_dir = os.path.dirname(os.path.realpath(__file__))
     api_dir = os.path.abspath(os.path.join(current_dir, "../../.."))
     env = os.environ.copy()
+    python_executable = get_python_executable(plugin_dir)
+    env["PATH"] = python_executable.replace("/python", ":") + env["PATH"]
     tlab_source_dir = os.environ.get("_TFL_SOURCE_CODE_DIR")
     python_path = env.get("PYTHONPATH", "")
     paths_to_include = [api_dir]
diff --git a/transformerlab/plugins/grpo_trainer_multi_gpu/setup.sh b/transformerlab/plugins/grpo_trainer_multi_gpu/setup.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 #pip install "datasets==2.9.0" "accelerate==0.21.0" "evaluate==0.4.0" loralib
-uv pip install trl accelerate
+uv pip install trl bitsandbytes accelerate
 #pip install rouge-score tensorboard py7zr
diff --git a/transformerlab/plugins/llama_trainer/index.json b/transformerlab/plugins/llama_trainer/index.json
@@ -4,10 +4,12 @@
   "description": "A training script adapted from https://www.philschmid.de/instruction-tune-llama-2 for training Llama2 using PeFT",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "1.0.22",
+  "version": "1.0.23",
   "model_architectures": [
     "LlamaForCausalLM",
     "Qwen2ForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM",
     "GemmaForCausalLM",
     "Gemma2ForCausalLM",
     "Gemma3ForCausalLM",
@@ -22,13 +24,8 @@
   ],
   "git": "",
   "url": "",
-  "files": [
-    "main.py",
-    "setup.sh"
-  ],
-  "supported_hardware_architectures": [
-    "cuda"
-  ],
+  "files": ["main.py", "setup.sh"],
+  "supported_hardware_architectures": ["cuda"],
   "setup-script": "setup.sh",
   "parameters": {
     "maximum_sequence_length": {
@@ -48,12 +45,7 @@
     "learning_rate_schedule": {
       "title": "Learning Rate Schedule",
       "type": "string",
-      "enum": [
-        "constant",
-        "linear",
-        "cosine",
-        "constant_with_warmup"
-      ],
+      "enum": ["constant", "linear", "cosine", "constant_with_warmup"],
       "default": "constant"
     },
     "learning_rate": {
@@ -137,4 +129,4 @@
       "ui:help": "This will create a new fused model with the adaptor and the model merged. A separate entry will be created in the model zoo for the fused model."
     }
   }
-}
+}
diff --git a/transformerlab/plugins/llama_trainer_multi_gpu/index.json b/transformerlab/plugins/llama_trainer_multi_gpu/index.json
@@ -4,32 +4,43 @@
   "description": "A training script adapted from https://www.philschmid.de/instruction-tune-llama-2 for training Llama2 using PeFT",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "model_architectures": [
-    "LlamaForCausalLM", 
-    "Qwen2ForCausalLM", 
-    "GemmaForCausalLM", 
-    "Gemma2ForCausalLM", 
-    "Gemma3ForCausalLM", 
-    "Gemma3ForConditionalGeneration", 
-    "AprielForCausalLM", 
-    "ExaoneForCausalLM", 
-    "PhiForCausalLM", 
-    "Phi3ForCausalLM", 
-    "MistralForCausalLM", 
+    "LlamaForCausalLM",
+    "Qwen2ForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM",
+    "GemmaForCausalLM",
+    "Gemma2ForCausalLM",
+    "Gemma3ForCausalLM",
+    "Gemma3ForConditionalGeneration",
+    "AprielForCausalLM",
+    "ExaoneForCausalLM",
+    "PhiForCausalLM",
+    "Phi3ForCausalLM",
+    "MistralForCausalLM",
     "MixtralForCausalLM"
   ],
   "git": "",
   "url": "",
-  "files": ["main.py", "setup.sh"],
-  "supported_hardware_architectures": ["cuda"],
+  "files": [
+    "main.py",
+    "setup.sh"
+  ],
+  "supported_hardware_architectures": [
+    "cuda"
+  ],
   "setup-script": "setup.sh",
   "parameters": {
     "train_device": {
       "title": "Training Device",
       "type": "string",
       "required": true,
-      "enum": ["cuda", "cpu", "tpu"],
+      "enum": [
+        "cuda",
+        "cpu",
+        "tpu"
+      ],
       "default": "cuda"
     },
     "gpu_ids": {
@@ -54,7 +65,12 @@
     "learning_rate_schedule": {
       "title": "Learning Rate Schedule",
       "type": "string",
-      "enum": ["constant", "linear", "cosine", "constant_with_warmup"],
+      "enum": [
+        "constant",
+        "linear",
+        "cosine",
+        "constant_with_warmup"
+      ],
       "default": "constant"
     },
     "learning_rate": {
@@ -148,4 +164,4 @@
       "ui:help": "This will create a new fused model with the adaptor and the model merged. A separate entry will be created in the model zoo for the fused model."
     }
   }
-}
+}
diff --git a/transformerlab/plugins/reward_modeling_llama_factory/index.json b/transformerlab/plugins/reward_modeling_llama_factory/index.json
@@ -4,15 +4,17 @@
   "description": "An implementation of RLHF (Reward Modeling) using Llama Factory.",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.0.6",
+  "version": "0.0.7",
   "model_architectures": [
     "GemmaForCausalLM",
     "LlamaForCausalLM",
     "MistralForCausalLM",
     "MixtralForCausalLM",
     "PhiForCausalLM",
     "Phi3ForCausalLM",
-    "Qwen2ForCausalLM"
+    "Qwen2ForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM"
   ],
   "supported_hardware_architectures": [
     "cuda"
diff --git a/transformerlab/plugins/sft_llama_factory/index.json b/transformerlab/plugins/sft_llama_factory/index.json
@@ -4,15 +4,17 @@
   "description": "An implementation of Supervised Finetuning using Llama Factory.",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.0.9",
+  "version": "0.0.10",
   "model_architectures": [
     "GemmaForCausalLM",
     "LlamaForCausalLM",
     "MistralForCausalLM",
     "MixtralForCausalLM",
     "PhiForCausalLM",
     "Phi3ForCausalLM",
-    "Qwen2ForCausalLM"
+    "Qwen2ForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM"
   ],
   "supported_hardware_architectures": [
     "cuda"
diff --git a/transformerlab/plugins/unsloth_grpo_trainer/index.json b/transformerlab/plugins/unsloth_grpo_trainer/index.json
@@ -4,11 +4,13 @@
   "description": "A GPRO trainer based on the unsloth grpo training notebooks",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.0.5",
+  "version": "0.0.6",
   "model_architectures": [
     "LlamaForCausalLM",
     "Qwen2ForCausalLM",
-    "GraniteForCausalLM"
+    "GraniteForCausalLM",
+    "Qwen3ForCausalLM",
+    "Qwen3MoeForCausalLM"
   ],
   "supported_hardware_architectures": [
     "cuda"