|
377 | 377 | "metadata": {},
|
378 | 378 | "outputs": [],
|
379 | 379 | "source": [
|
380 |
| - "# Training parameters\n", |
| 380 | + "# Default training parameters\n", |
381 | 381 | "training_parameters = dict(\n",
|
382 | 382 | " num_train_epochs=3,\n",
|
383 | 383 | " per_device_train_batch_size=1,\n",
|
384 | 384 | " per_device_eval_batch_size=1,\n",
|
385 | 385 | " learning_rate=5e-6,\n",
|
386 | 386 | " lr_scheduler_type=\"cosine\",\n",
|
387 | 387 | ")\n",
|
388 |
| - "print(f\"The following training parameters are enabled - {training_parameters}\")\n", |
| 388 | + "# Default optimization parameters\n", |
| 389 | + "optimization_parameters = dict(\n", |
| 390 | + " apply_lora=\"true\",\n", |
| 391 | + " apply_deepspeed=\"true\",\n", |
| 392 | + " deepspeed_stage=2,\n", |
| 393 | + ")\n", |
| 394 | + "# Let's construct finetuning parameters using training and optimization paramters.\n", |
| 395 | + "finetune_parameters = {**training_parameters, **optimization_parameters}\n", |
389 | 396 | "\n",
|
390 |
| - "# Optimization parameters - As these parameters are packaged with the model itself, lets retrieve those parameters\n", |
| 397 | + "# Each model finetuning works best with certain finetuning parameters which are packed with model as `model_specific_defaults`.\n", |
| 398 | + "# Let's override the finetune_parameters in case the model has some custom defaults.\n", |
391 | 399 | "if \"model_specific_defaults\" in foundation_model.tags:\n",
|
392 |
| - " optimization_parameters = ast.literal_eval(\n", |
393 |
| - " foundation_model.tags[\"model_specific_defaults\"]\n", |
394 |
| - " ) # convert string to python dict\n", |
395 |
| - "else:\n", |
396 |
| - " optimization_parameters = dict(apply_lora=\"true\", apply_deepspeed=\"true\")\n", |
397 |
| - "print(f\"The following optimizations are enabled - {optimization_parameters}\")" |
| 400 | + " print(\"Warning! Model specific defaults exist. The defaults could be overridden.\")\n", |
| 401 | + " finetune_parameters.update(\n", |
| 402 | + " ast.literal_eval( # convert string to python dict\n", |
| 403 | + " foundation_model.tags[\"model_specific_defaults\"]\n", |
| 404 | + " )\n", |
| 405 | + " )\n", |
| 406 | + "print(\n", |
| 407 | + " f\"The following finetune parameters are going to be set for the run: {finetune_parameters}\"\n", |
| 408 | + ")" |
| 409 | + ] |
| 410 | + }, |
| 411 | + { |
| 412 | + "cell_type": "code", |
| 413 | + "execution_count": null, |
| 414 | + "metadata": {}, |
| 415 | + "outputs": [], |
| 416 | + "source": [ |
| 417 | + "# Set the pipeline display name for distinguishing different runs from the name\n", |
| 418 | + "def get_pipeline_display_name():\n", |
| 419 | + " batch_size = (\n", |
| 420 | + " int(finetune_parameters.get(\"per_device_train_batch_size\", 1))\n", |
| 421 | + " * int(finetune_parameters.get(\"gradient_accumulation_steps\", 1))\n", |
| 422 | + " * int(gpus_per_node)\n", |
| 423 | + " * int(finetune_parameters.get(\"num_nodes_finetune\", 1))\n", |
| 424 | + " )\n", |
| 425 | + " scheduler = finetune_parameters.get(\"lr_scheduler_type\", \"linear\")\n", |
| 426 | + " deepspeed = finetune_parameters.get(\"apply_deepspeed\", \"false\")\n", |
| 427 | + " ds_stage = finetune_parameters.get(\"deepspeed_stage\", \"2\")\n", |
| 428 | + " if deepspeed == \"true\":\n", |
| 429 | + " ds_string = f\"ds{ds_stage}\"\n", |
| 430 | + " else:\n", |
| 431 | + " ds_string = \"nods\"\n", |
| 432 | + " lora = finetune_parameters.get(\"apply_lora\", \"false\")\n", |
| 433 | + " if lora == \"true\":\n", |
| 434 | + " lora_string = \"lora\"\n", |
| 435 | + " else:\n", |
| 436 | + " lora_string = \"nolora\"\n", |
| 437 | + " save_limit = finetune_parameters.get(\"save_total_limit\", -1)\n", |
| 438 | + " seq_len = finetune_parameters.get(\"max_seq_length\", -1)\n", |
| 439 | + " return (\n", |
| 440 | + " model_name\n", |
| 441 | + " + \"-\"\n", |
| 442 | + " + \"ultrachat\"\n", |
| 443 | + " + \"-\"\n", |
| 444 | + " + f\"bs{batch_size}\"\n", |
| 445 | + " + \"-\"\n", |
| 446 | + " + f\"{scheduler}\"\n", |
| 447 | + " + \"-\"\n", |
| 448 | + " + ds_string\n", |
| 449 | + " + \"-\"\n", |
| 450 | + " + lora_string\n", |
| 451 | + " + f\"-save_limit{save_limit}\"\n", |
| 452 | + " + f\"-seqlen{seq_len}\"\n", |
| 453 | + " )\n", |
| 454 | + "\n", |
| 455 | + "\n", |
| 456 | + "pipeline_display_name = get_pipeline_display_name()\n", |
| 457 | + "print(f\"Display name used for the run: {pipeline_display_name}\")" |
398 | 458 | ]
|
399 | 459 | },
|
400 | 460 | {
|
|
413 | 473 | "\n",
|
414 | 474 | "\n",
|
415 | 475 | "# define the pipeline job\n",
|
416 |
| - "@pipeline()\n", |
| 476 | + "@pipeline(name=pipeline_display_name)\n", |
417 | 477 | "def create_pipeline():\n",
|
418 | 478 | " chat_completion_pipeline = pipeline_component_func(\n",
|
419 | 479 | " mlflow_model_path=foundation_model.id,\n",
|
|
430 | 490 | " ),\n",
|
431 | 491 | " # Training settings\n",
|
432 | 492 | " number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n",
|
433 |
| - " **training_parameters,\n", |
434 |
| - " **optimization_parameters\n", |
| 493 | + " **finetune_parameters\n", |
435 | 494 | " )\n",
|
436 | 495 | " return {\n",
|
437 | 496 | " # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n",
|
|
0 commit comments