Skip to content

Commit 8a5c1c0

Browse files
authored
resolving conflicts between training args and optimization args (Azure#3193)
* resolving conflicts between training args and optimization args * Changing the log text * black formatting * adding the pipeline name to the notebook
1 parent df0bfcc commit 8a5c1c0

File tree

1 file changed

+71
-12
lines changed

1 file changed

+71
-12
lines changed

sdk/python/foundation-models/system/finetune/chat-completion/chat-completion.ipynb

+71-12
Original file line numberDiff line numberDiff line change
@@ -377,24 +377,84 @@
377377
"metadata": {},
378378
"outputs": [],
379379
"source": [
380-
"# Training parameters\n",
380+
"# Default training parameters\n",
381381
"training_parameters = dict(\n",
382382
" num_train_epochs=3,\n",
383383
" per_device_train_batch_size=1,\n",
384384
" per_device_eval_batch_size=1,\n",
385385
" learning_rate=5e-6,\n",
386386
" lr_scheduler_type=\"cosine\",\n",
387387
")\n",
388-
"print(f\"The following training parameters are enabled - {training_parameters}\")\n",
388+
"# Default optimization parameters\n",
389+
"optimization_parameters = dict(\n",
390+
" apply_lora=\"true\",\n",
391+
" apply_deepspeed=\"true\",\n",
392+
" deepspeed_stage=2,\n",
393+
")\n",
394+
"# Let's construct finetuning parameters using training and optimization paramters.\n",
395+
"finetune_parameters = {**training_parameters, **optimization_parameters}\n",
389396
"\n",
390-
"# Optimization parameters - As these parameters are packaged with the model itself, lets retrieve those parameters\n",
397+
"# Each model finetuning works best with certain finetuning parameters which are packed with model as `model_specific_defaults`.\n",
398+
"# Let's override the finetune_parameters in case the model has some custom defaults.\n",
391399
"if \"model_specific_defaults\" in foundation_model.tags:\n",
392-
" optimization_parameters = ast.literal_eval(\n",
393-
" foundation_model.tags[\"model_specific_defaults\"]\n",
394-
" ) # convert string to python dict\n",
395-
"else:\n",
396-
" optimization_parameters = dict(apply_lora=\"true\", apply_deepspeed=\"true\")\n",
397-
"print(f\"The following optimizations are enabled - {optimization_parameters}\")"
400+
" print(\"Warning! Model specific defaults exist. The defaults could be overridden.\")\n",
401+
" finetune_parameters.update(\n",
402+
" ast.literal_eval( # convert string to python dict\n",
403+
" foundation_model.tags[\"model_specific_defaults\"]\n",
404+
" )\n",
405+
" )\n",
406+
"print(\n",
407+
" f\"The following finetune parameters are going to be set for the run: {finetune_parameters}\"\n",
408+
")"
409+
]
410+
},
411+
{
412+
"cell_type": "code",
413+
"execution_count": null,
414+
"metadata": {},
415+
"outputs": [],
416+
"source": [
417+
"# Set the pipeline display name for distinguishing different runs from the name\n",
418+
"def get_pipeline_display_name():\n",
419+
" batch_size = (\n",
420+
" int(finetune_parameters.get(\"per_device_train_batch_size\", 1))\n",
421+
" * int(finetune_parameters.get(\"gradient_accumulation_steps\", 1))\n",
422+
" * int(gpus_per_node)\n",
423+
" * int(finetune_parameters.get(\"num_nodes_finetune\", 1))\n",
424+
" )\n",
425+
" scheduler = finetune_parameters.get(\"lr_scheduler_type\", \"linear\")\n",
426+
" deepspeed = finetune_parameters.get(\"apply_deepspeed\", \"false\")\n",
427+
" ds_stage = finetune_parameters.get(\"deepspeed_stage\", \"2\")\n",
428+
" if deepspeed == \"true\":\n",
429+
" ds_string = f\"ds{ds_stage}\"\n",
430+
" else:\n",
431+
" ds_string = \"nods\"\n",
432+
" lora = finetune_parameters.get(\"apply_lora\", \"false\")\n",
433+
" if lora == \"true\":\n",
434+
" lora_string = \"lora\"\n",
435+
" else:\n",
436+
" lora_string = \"nolora\"\n",
437+
" save_limit = finetune_parameters.get(\"save_total_limit\", -1)\n",
438+
" seq_len = finetune_parameters.get(\"max_seq_length\", -1)\n",
439+
" return (\n",
440+
" model_name\n",
441+
" + \"-\"\n",
442+
" + \"ultrachat\"\n",
443+
" + \"-\"\n",
444+
" + f\"bs{batch_size}\"\n",
445+
" + \"-\"\n",
446+
" + f\"{scheduler}\"\n",
447+
" + \"-\"\n",
448+
" + ds_string\n",
449+
" + \"-\"\n",
450+
" + lora_string\n",
451+
" + f\"-save_limit{save_limit}\"\n",
452+
" + f\"-seqlen{seq_len}\"\n",
453+
" )\n",
454+
"\n",
455+
"\n",
456+
"pipeline_display_name = get_pipeline_display_name()\n",
457+
"print(f\"Display name used for the run: {pipeline_display_name}\")"
398458
]
399459
},
400460
{
@@ -413,7 +473,7 @@
413473
"\n",
414474
"\n",
415475
"# define the pipeline job\n",
416-
"@pipeline()\n",
476+
"@pipeline(name=pipeline_display_name)\n",
417477
"def create_pipeline():\n",
418478
" chat_completion_pipeline = pipeline_component_func(\n",
419479
" mlflow_model_path=foundation_model.id,\n",
@@ -430,8 +490,7 @@
430490
" ),\n",
431491
" # Training settings\n",
432492
" number_of_gpu_to_use_finetuning=gpus_per_node, # set to the number of GPUs available in the compute\n",
433-
" **training_parameters,\n",
434-
" **optimization_parameters\n",
493+
" **finetune_parameters\n",
435494
" )\n",
436495
" return {\n",
437496
" # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model\n",

0 commit comments

Comments
 (0)