Skip to content

Commit 52eacce

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add autoscaling_target_pubsub_num_undelivered_messages option in Preview model deployment on Endpoint & Model classes.
PiperOrigin-RevId: 792342163
1 parent f6ad3bc commit 52eacce

File tree

4 files changed

+280
-6
lines changed

4 files changed

+280
-6
lines changed

google/cloud/aiplatform/models.py

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,8 @@ def deploy(
13641364
autoscaling_target_cpu_utilization: Optional[int] = None,
13651365
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
13661366
autoscaling_target_request_count_per_minute: Optional[int] = None,
1367+
autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
1368+
autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
13671369
enable_access_logging=False,
13681370
disable_container_logging: bool = False,
13691371
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -1460,6 +1462,12 @@ def deploy(
14601462
autoscaling_target_request_count_per_minute (int):
14611463
Optional. The target number of requests per minute for autoscaling.
14621464
If set, the model will be scaled based on the number of requests it receives.
1465+
autoscaling_target_pubsub_num_undelivered_messages (int):
1466+
Optional. The target number of pubsub undelivered messages for autoscaling.
1467+
If set, the model will be scaled based on the pubsub queue size.
1468+
autoscaling_pubsub_subscription_labels (Dict[str, str]):
1469+
Optional. Monitored resource labels as key value pairs for
1470+
metric filtering for pubsub_num_undelivered_messages.
14631471
enable_access_logging (bool):
14641472
Whether to enable endpoint access logging. Defaults to False.
14651473
disable_container_logging (bool):
@@ -1541,6 +1549,8 @@ def deploy(
15411549
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
15421550
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
15431551
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
1552+
autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
1553+
autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
15441554
spot=spot,
15451555
enable_access_logging=enable_access_logging,
15461556
disable_container_logging=disable_container_logging,
@@ -1574,6 +1584,8 @@ def _deploy(
15741584
autoscaling_target_cpu_utilization: Optional[int] = None,
15751585
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
15761586
autoscaling_target_request_count_per_minute: Optional[int] = None,
1587+
autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
1588+
autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
15771589
spot: bool = False,
15781590
enable_access_logging=False,
15791591
disable_container_logging: bool = False,
@@ -1673,6 +1685,12 @@ def _deploy(
16731685
autoscaling_target_request_count_per_minute (int):
16741686
Optional. The target number of requests per minute for autoscaling.
16751687
If set, the model will be scaled based on the number of requests it receives.
1688+
autoscaling_target_pubsub_num_undelivered_messages (int):
1689+
Optional. The target number of pubsub undelivered messages for autoscaling.
1690+
If set, the model will be scaled based on the pubsub queue size.
1691+
autoscaling_pubsub_subscription_labels (Dict[str, str]):
1692+
Optional. Monitored resource labels as key value pairs for
1693+
metric filtering for pubsub_num_undelivered_messages.
16761694
spot (bool):
16771695
Optional. Whether to schedule the deployment workload on spot VMs.
16781696
enable_access_logging (bool):
@@ -1731,6 +1749,8 @@ def _deploy(
17311749
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
17321750
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
17331751
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
1752+
autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
1753+
autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
17341754
spot=spot,
17351755
enable_access_logging=enable_access_logging,
17361756
disable_container_logging=disable_container_logging,
@@ -1771,6 +1791,8 @@ def _deploy_call(
17711791
autoscaling_target_cpu_utilization: Optional[int] = None,
17721792
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
17731793
autoscaling_target_request_count_per_minute: Optional[int] = None,
1794+
autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
1795+
autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
17741796
spot: bool = False,
17751797
enable_access_logging=False,
17761798
disable_container_logging: bool = False,
@@ -1876,6 +1898,11 @@ def _deploy_call(
18761898
A default value of 60 will be used if not specified.
18771899
autoscaling_target_request_count_per_minute (int):
18781900
Optional. Target request count per minute per instance.
1901+
autoscaling_target_pubsub_num_undelivered_messages (int):
1902+
Optional. Target pubsub queue size per instance.
1903+
autoscaling_pubsub_subscription_labels (Dict[str, str]):
1904+
Optional. Monitored resource labels as key value pairs for
1905+
metric filtering for pubsub_num_undelivered_messages.
18791906
spot (bool):
18801907
Optional. Whether to schedule the deployment workload on spot VMs.
18811908
enable_access_logging (bool):
@@ -1946,6 +1973,8 @@ def _deploy_call(
19461973
or autoscaling_target_accelerator_duty_cycle
19471974
or autoscaling_target_cpu_utilization
19481975
or autoscaling_target_request_count_per_minute
1976+
or autoscaling_target_pubsub_num_undelivered_messages
1977+
or autoscaling_pubsub_subscription_labels
19491978
)
19501979

19511980
if provided_custom_machine_spec:
@@ -1954,7 +1983,9 @@ def _deploy_call(
19541983
"The machine_type, accelerator_type and accelerator_count, "
19551984
"autoscaling_target_accelerator_duty_cycle, "
19561985
"autoscaling_target_cpu_utilization, "
1957-
"autoscaling_target_request_count_per_minute parameters "
1986+
"autoscaling_target_request_count_per_minute, "
1987+
"autoscaling_target_pubsub_num_undelivered_messages, "
1988+
"autoscaling_pubsub_subscription_labels parameters "
19581989
"may not be set when `deployment_resource_pool` is "
19591990
"specified."
19601991
)
@@ -2008,6 +2039,8 @@ def _deploy_call(
20082039
or autoscaling_target_accelerator_duty_cycle
20092040
or autoscaling_target_cpu_utilization
20102041
or autoscaling_target_request_count_per_minute
2042+
or autoscaling_target_pubsub_num_undelivered_messages
2043+
or autoscaling_pubsub_subscription_labels
20112044
)
20122045

20132046
# If the model supports both automatic and dedicated deployment resources,
@@ -2022,7 +2055,9 @@ def _deploy_call(
20222055
"The machine_type, accelerator_type and accelerator_count, "
20232056
"autoscaling_target_accelerator_duty_cycle, "
20242057
"autoscaling_target_cpu_utilization, "
2025-
"autoscaling_target_request_count_per_minute parameters "
2058+
"autoscaling_target_request_count_per_minute, "
2059+
"autoscaling_target_pubsub_num_undelivered_messages, "
2060+
"autoscaling_pubsub_subscription_labels parameters "
20262061
"are ignored."
20272062
)
20282063

@@ -2079,6 +2114,19 @@ def _deploy_call(
20792114
[autoscaling_metric_spec]
20802115
)
20812116

2117+
if autoscaling_target_pubsub_num_undelivered_messages:
2118+
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
2119+
metric_name=(
2120+
"pubsub.googleapis.com/subscription/"
2121+
"num_undelivered_messages"
2122+
),
2123+
target=autoscaling_target_pubsub_num_undelivered_messages,
2124+
monitored_resource_labels=autoscaling_pubsub_subscription_labels,
2125+
)
2126+
dedicated_resources.autoscaling_metric_specs.extend(
2127+
[autoscaling_metric_spec]
2128+
)
2129+
20822130
if reservation_affinity_type:
20832131
machine_spec.reservation_affinity = utils.get_reservation_affinity(
20842132
reservation_affinity_type,
@@ -4399,6 +4447,8 @@ def deploy(
43994447
autoscaling_target_cpu_utilization: Optional[int] = None,
44004448
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
44014449
autoscaling_target_request_count_per_minute: Optional[int] = None,
4450+
autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
4451+
autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
44024452
) -> None:
44034453
"""Deploys a Model to the PrivateEndpoint.
44044454
@@ -4575,6 +4625,8 @@ def deploy(
45754625
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
45764626
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
45774627
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
4628+
autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
4629+
autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
45784630
)
45794631

45804632
def update(
@@ -5647,6 +5699,8 @@ def deploy(
56475699
autoscaling_target_cpu_utilization: Optional[int] = None,
56485700
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
56495701
autoscaling_target_request_count_per_minute: Optional[int] = None,
5702+
autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
5703+
autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
56505704
enable_access_logging=False,
56515705
disable_container_logging: bool = False,
56525706
private_service_connect_config: Optional[
@@ -5765,6 +5819,12 @@ def deploy(
57655819
autoscaling_target_request_count_per_minute (int):
57665820
Optional. The target number of requests per minute for autoscaling.
57675821
If set, the model will be scaled based on the number of requests it receives.
5822+
autoscaling_target_pubsub_num_undelivered_messages (int):
5823+
Optional. The target number of pubsub undelivered messages for autoscaling.
5824+
If set, the model will be scaled based on the pubsub queue size.
5825+
autoscaling_pubsub_subscription_labels (Dict[str, str]):
5826+
Optional. Monitored resource labels as key value pairs for
5827+
metric filtering for pubsub_num_undelivered_messages.
57685828
enable_access_logging (bool):
57695829
Whether to enable endpoint access logging. Defaults to False.
57705830
disable_container_logging (bool):
@@ -5818,8 +5878,12 @@ def deploy(
58185878
autoscaling_target_request_count_per_minute (int):
58195879
Optional. The target number of requests per minute for autoscaling.
58205880
If set, the model will be scaled based on the number of requests it receives.
5821-
available_replica_count reaches required_replica_count, and the
5822-
rest of the replicas will be retried.
5881+
autoscaling_target_pubsub_num_undelivered_messages (int):
5882+
Optional. The target number of pubsub undelivered messages for autoscaling.
5883+
If set, the model will be scaled based on the pubsub queue size.
5884+
autoscaling_pubsub_subscription_labels (Dict[str, str]):
5885+
Optional. Monitored resource labels as key value pairs for
5886+
metric filtering for pubsub_num_undelivered_messages.
58235887
58245888
Returns:
58255889
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5885,6 +5949,8 @@ def deploy(
58855949
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
58865950
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
58875951
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
5952+
autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
5953+
autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
58885954
spot=spot,
58895955
enable_access_logging=enable_access_logging,
58905956
disable_container_logging=disable_container_logging,
@@ -5928,6 +5994,8 @@ def _deploy(
59285994
autoscaling_target_cpu_utilization: Optional[int] = None,
59295995
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
59305996
autoscaling_target_request_count_per_minute: Optional[int] = None,
5997+
autoscaling_target_pubsub_num_undelivered_messages: Optional[int] = None,
5998+
autoscaling_pubsub_subscription_labels: Optional[Dict[str, str]] = None,
59315999
spot: bool = False,
59326000
enable_access_logging=False,
59336001
disable_container_logging: bool = False,
@@ -6048,6 +6116,12 @@ def _deploy(
60486116
autoscaling_target_request_count_per_minute (int):
60496117
Optional. The target number of requests per minute for autoscaling.
60506118
If set, the model will be scaled based on the number of requests it receives.
6119+
autoscaling_target_pubsub_num_undelivered_messages (int):
6120+
Optional. The target number of pubsub undelivered messages for autoscaling.
6121+
If set, the model will be scaled based on the pubsub queue size.
6122+
autoscaling_pubsub_subscription_labels (Dict[str, str]):
6123+
Optional. Monitored resource labels as key value pairs for
6124+
metric filtering for pubsub_num_undelivered_messages.
60516125
spot (bool):
60526126
Optional. Whether to schedule the deployment workload on spot VMs.
60536127
enable_access_logging (bool):
@@ -6137,6 +6211,8 @@ def _deploy(
61376211
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
61386212
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
61396213
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
6214+
autoscaling_target_pubsub_num_undelivered_messages=autoscaling_target_pubsub_num_undelivered_messages,
6215+
autoscaling_pubsub_subscription_labels=autoscaling_pubsub_subscription_labels,
61406216
spot=spot,
61416217
enable_access_logging=enable_access_logging,
61426218
disable_container_logging=disable_container_logging,

0 commit comments

Comments
 (0)