feat: Integration with Vertex AI Multimodal Datasets: Update train_dataset and validation_dataset in sft.train() docstring to include the Vertex Multimodal Dataset as a dataset source option.

Frances Hubis Thoma · copybara-github · commit e051a96005fd · 2025-05-27T00:44:54.000-07:00
PiperOrigin-RevId: 761584492
diff --git a/google/cloud/aiplatform_v1/types/tuning_job.py b/google/cloud/aiplatform_v1/types/tuning_job.py
@@ -529,13 +529,9 @@ class SupervisedTuningSpec(proto.Message):
 
     Attributes:
         training_dataset_uri (str):
-            Required. Cloud Storage path to file
-            containing training dataset for tuning. The
-            dataset must be formatted as a JSONL file.
+            Required. Training dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
         validation_dataset_uri (str):
-            Optional. Cloud Storage path to file
-            containing validation dataset for tuning. The
-            dataset must be formatted as a JSONL file.
+            Optional. Validation dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
         hyper_parameters (google.cloud.aiplatform_v1.types.SupervisedHyperParameters):
             Optional. Hyperparameters for SFT.
         export_last_checkpoint_only (bool):
diff --git a/google/cloud/aiplatform_v1beta1/types/tuning_job.py b/google/cloud/aiplatform_v1beta1/types/tuning_job.py
@@ -764,14 +764,9 @@ class SupervisedTuningSpec(proto.Message):
 
     Attributes:
         training_dataset_uri (str):
-            Required. Cloud Storage path to file
-            containing training dataset for tuning. The
-            dataset must be formatted as a JSONL file.
+            Required. Training dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
         validation_dataset_uri (str):
-            Optional. Cloud Storage path to file
-            containing validation dataset for tuning. The
-            dataset must be formatted as a JSONL file.
-        hyper_parameters (google.cloud.aiplatform_v1beta1.types.SupervisedHyperParameters):
+            Optional. Validation dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.a1.types.SupervisedHyperParameters):
             Optional. Hyperparameters for SFT.
         export_last_checkpoint_only (bool):
             Optional. If set to true, disable
diff --git a/vertexai/tuning/_supervised_tuning.py b/vertexai/tuning/_supervised_tuning.py
@@ -38,10 +38,8 @@ def train(
 
     Args:
         source_model (str): Model name for tuning, e.g., "gemini-1.0-pro-002".
-        train_dataset: Cloud Storage path to file containing training dataset for
-          tuning. The dataset should be in JSONL format.
-        validation_dataset: Cloud Storage path to file containing validation
-          dataset for tuning. The dataset should be in JSONL format.
+        train_dataset: Training dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
+        validation_dataset: Validation dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
         tuned_model_display_name: The display name of the
           [TunedModel][google.cloud.aiplatform.v1.Model]. The name can be up to
           128 characters long and can consist of any UTF-8 characters.