@@ -1364,6 +1364,8 @@ def deploy(
13641364 autoscaling_target_cpu_utilization : Optional [int ] = None ,
13651365 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
13661366 autoscaling_target_request_count_per_minute : Optional [int ] = None ,
1367+ autoscaling_target_pubsub_num_undelivered_messages : Optional [int ] = None ,
1368+ autoscaling_pubsub_subscription_labels : Optional [Dict [str , str ]] = None ,
13671369 enable_access_logging = False ,
13681370 disable_container_logging : bool = False ,
13691371 deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
@@ -1460,6 +1462,12 @@ def deploy(
14601462 autoscaling_target_request_count_per_minute (int):
14611463 Optional. The target number of requests per minute for autoscaling.
14621464 If set, the model will be scaled based on the number of requests it receives.
1465+ autoscaling_target_pubsub_num_undelivered_messages (int):
1466+ Optional. The target number of pubsub undelivered messages for autoscaling.
1467+ If set, the model will be scaled based on the pubsub queue size.
1468+ autoscaling_pubsub_subscription_labels (Dict[str, str]):
1469+ Optional. Monitored resource labels as key value pairs for
1470+ metric filtering for pubsub_num_undelivered_messages.
14631471 enable_access_logging (bool):
14641472 Whether to enable endpoint access logging. Defaults to False.
14651473 disable_container_logging (bool):
@@ -1541,6 +1549,8 @@ def deploy(
15411549 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
15421550 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
15431551 autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
1552+ autoscaling_target_pubsub_num_undelivered_messages = autoscaling_target_pubsub_num_undelivered_messages ,
1553+ autoscaling_pubsub_subscription_labels = autoscaling_pubsub_subscription_labels ,
15441554 spot = spot ,
15451555 enable_access_logging = enable_access_logging ,
15461556 disable_container_logging = disable_container_logging ,
@@ -1574,6 +1584,8 @@ def _deploy(
15741584 autoscaling_target_cpu_utilization : Optional [int ] = None ,
15751585 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
15761586 autoscaling_target_request_count_per_minute : Optional [int ] = None ,
1587+ autoscaling_target_pubsub_num_undelivered_messages : Optional [int ] = None ,
1588+ autoscaling_pubsub_subscription_labels : Optional [Dict [str , str ]] = None ,
15771589 spot : bool = False ,
15781590 enable_access_logging = False ,
15791591 disable_container_logging : bool = False ,
@@ -1673,6 +1685,12 @@ def _deploy(
16731685 autoscaling_target_request_count_per_minute (int):
16741686 Optional. The target number of requests per minute for autoscaling.
16751687 If set, the model will be scaled based on the number of requests it receives.
1688+ autoscaling_target_pubsub_num_undelivered_messages (int):
1689+ Optional. The target number of pubsub undelivered messages for autoscaling.
1690+ If set, the model will be scaled based on the pubsub queue size.
1691+ autoscaling_pubsub_subscription_labels (Dict[str, str]):
1692+ Optional. Monitored resource labels as key value pairs for
1693+ metric filtering for pubsub_num_undelivered_messages.
16761694 spot (bool):
16771695 Optional. Whether to schedule the deployment workload on spot VMs.
16781696 enable_access_logging (bool):
@@ -1731,6 +1749,8 @@ def _deploy(
17311749 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
17321750 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
17331751 autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
1752+ autoscaling_target_pubsub_num_undelivered_messages = autoscaling_target_pubsub_num_undelivered_messages ,
1753+ autoscaling_pubsub_subscription_labels = autoscaling_pubsub_subscription_labels ,
17341754 spot = spot ,
17351755 enable_access_logging = enable_access_logging ,
17361756 disable_container_logging = disable_container_logging ,
@@ -1771,6 +1791,8 @@ def _deploy_call(
17711791 autoscaling_target_cpu_utilization : Optional [int ] = None ,
17721792 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
17731793 autoscaling_target_request_count_per_minute : Optional [int ] = None ,
1794+ autoscaling_target_pubsub_num_undelivered_messages : Optional [int ] = None ,
1795+ autoscaling_pubsub_subscription_labels : Optional [Dict [str , str ]] = None ,
17741796 spot : bool = False ,
17751797 enable_access_logging = False ,
17761798 disable_container_logging : bool = False ,
@@ -1876,6 +1898,11 @@ def _deploy_call(
18761898 A default value of 60 will be used if not specified.
18771899 autoscaling_target_request_count_per_minute (int):
18781900 Optional. Target request count per minute per instance.
1901+ autoscaling_target_pubsub_num_undelivered_messages (int):
1902+ Optional. Target pubsub queue size per instance.
1903+ autoscaling_pubsub_subscription_labels (Dict[str, str]):
1904+ Optional. Monitored resource labels as key value pairs for
1905+ metric filtering for pubsub_num_undelivered_messages.
18791906 spot (bool):
18801907 Optional. Whether to schedule the deployment workload on spot VMs.
18811908 enable_access_logging (bool):
@@ -1946,6 +1973,8 @@ def _deploy_call(
19461973 or autoscaling_target_accelerator_duty_cycle
19471974 or autoscaling_target_cpu_utilization
19481975 or autoscaling_target_request_count_per_minute
1976+ or autoscaling_target_pubsub_num_undelivered_messages
1977+ or autoscaling_pubsub_subscription_labels
19491978 )
19501979
19511980 if provided_custom_machine_spec :
@@ -1954,7 +1983,9 @@ def _deploy_call(
19541983 "The machine_type, accelerator_type and accelerator_count, "
19551984 "autoscaling_target_accelerator_duty_cycle, "
19561985 "autoscaling_target_cpu_utilization, "
1957- "autoscaling_target_request_count_per_minute parameters "
1986+ "autoscaling_target_request_count_per_minute, "
1987+ "autoscaling_target_pubsub_num_undelivered_messages, "
1988+ "autoscaling_pubsub_subscription_labels parameters "
19581989 "may not be set when `deployment_resource_pool` is "
19591990 "specified."
19601991 )
@@ -2008,6 +2039,8 @@ def _deploy_call(
20082039 or autoscaling_target_accelerator_duty_cycle
20092040 or autoscaling_target_cpu_utilization
20102041 or autoscaling_target_request_count_per_minute
2042+ or autoscaling_target_pubsub_num_undelivered_messages
2043+ or autoscaling_pubsub_subscription_labels
20112044 )
20122045
20132046 # If the model supports both automatic and dedicated deployment resources,
@@ -2022,7 +2055,9 @@ def _deploy_call(
20222055 "The machine_type, accelerator_type and accelerator_count, "
20232056 "autoscaling_target_accelerator_duty_cycle, "
20242057 "autoscaling_target_cpu_utilization, "
2025- "autoscaling_target_request_count_per_minute parameters "
2058+ "autoscaling_target_request_count_per_minute, "
2059+ "autoscaling_target_pubsub_num_undelivered_messages, "
2060+ "autoscaling_pubsub_subscription_labels parameters "
20262061 "are ignored."
20272062 )
20282063
@@ -2079,6 +2114,19 @@ def _deploy_call(
20792114 [autoscaling_metric_spec ]
20802115 )
20812116
2117+ if autoscaling_target_pubsub_num_undelivered_messages :
2118+ autoscaling_metric_spec = gca_machine_resources_compat .AutoscalingMetricSpec (
2119+ metric_name = (
2120+ "pubsub.googleapis.com/subscription/"
2121+ "num_undelivered_messages"
2122+ ),
2123+ target = autoscaling_target_pubsub_num_undelivered_messages ,
2124+ monitored_resource_labels = autoscaling_pubsub_subscription_labels ,
2125+ )
2126+ dedicated_resources .autoscaling_metric_specs .extend (
2127+ [autoscaling_metric_spec ]
2128+ )
2129+
20822130 if reservation_affinity_type :
20832131 machine_spec .reservation_affinity = utils .get_reservation_affinity (
20842132 reservation_affinity_type ,
@@ -4399,6 +4447,8 @@ def deploy(
43994447 autoscaling_target_cpu_utilization : Optional [int ] = None ,
44004448 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
44014449 autoscaling_target_request_count_per_minute : Optional [int ] = None ,
4450+ autoscaling_target_pubsub_num_undelivered_messages : Optional [int ] = None ,
4451+ autoscaling_pubsub_subscription_labels : Optional [Dict [str , str ]] = None ,
44024452 ) -> None :
44034453 """Deploys a Model to the PrivateEndpoint.
44044454
@@ -4575,6 +4625,8 @@ def deploy(
45754625 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
45764626 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
45774627 autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
4628+ autoscaling_target_pubsub_num_undelivered_messages = autoscaling_target_pubsub_num_undelivered_messages ,
4629+ autoscaling_pubsub_subscription_labels = autoscaling_pubsub_subscription_labels ,
45784630 )
45794631
45804632 def update (
@@ -5647,6 +5699,8 @@ def deploy(
56475699 autoscaling_target_cpu_utilization : Optional [int ] = None ,
56485700 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
56495701 autoscaling_target_request_count_per_minute : Optional [int ] = None ,
5702+ autoscaling_target_pubsub_num_undelivered_messages : Optional [int ] = None ,
5703+ autoscaling_pubsub_subscription_labels : Optional [Dict [str , str ]] = None ,
56505704 enable_access_logging = False ,
56515705 disable_container_logging : bool = False ,
56525706 private_service_connect_config : Optional [
@@ -5765,6 +5819,12 @@ def deploy(
57655819 autoscaling_target_request_count_per_minute (int):
57665820 Optional. The target number of requests per minute for autoscaling.
57675821 If set, the model will be scaled based on the number of requests it receives.
5822+ autoscaling_target_pubsub_num_undelivered_messages (int):
5823+ Optional. The target number of pubsub undelivered messages for autoscaling.
5824+ If set, the model will be scaled based on the pubsub queue size.
5825+ autoscaling_pubsub_subscription_labels (Dict[str, str]):
5826+ Optional. Monitored resource labels as key value pairs for
5827+ metric filtering for pubsub_num_undelivered_messages.
57685828 enable_access_logging (bool):
57695829 Whether to enable endpoint access logging. Defaults to False.
57705830 disable_container_logging (bool):
@@ -5818,8 +5878,12 @@ def deploy(
58185878 autoscaling_target_request_count_per_minute (int):
58195879 Optional. The target number of requests per minute for autoscaling.
58205880 If set, the model will be scaled based on the number of requests it receives.
5821- available_replica_count reaches required_replica_count, and the
5822- rest of the replicas will be retried.
5881+ autoscaling_target_pubsub_num_undelivered_messages (int):
5882+ Optional. The target number of pubsub undelivered messages for autoscaling.
5883+ If set, the model will be scaled based on the pubsub queue size.
5884+ autoscaling_pubsub_subscription_labels (Dict[str, str]):
5885+ Optional. Monitored resource labels as key value pairs for
5886+ metric filtering for pubsub_num_undelivered_messages.
58235887
58245888 Returns:
58255889 endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5885,6 +5949,8 @@ def deploy(
58855949 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
58865950 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
58875951 autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
5952+ autoscaling_target_pubsub_num_undelivered_messages = autoscaling_target_pubsub_num_undelivered_messages ,
5953+ autoscaling_pubsub_subscription_labels = autoscaling_pubsub_subscription_labels ,
58885954 spot = spot ,
58895955 enable_access_logging = enable_access_logging ,
58905956 disable_container_logging = disable_container_logging ,
@@ -5928,6 +5994,8 @@ def _deploy(
59285994 autoscaling_target_cpu_utilization : Optional [int ] = None ,
59295995 autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
59305996 autoscaling_target_request_count_per_minute : Optional [int ] = None ,
5997+ autoscaling_target_pubsub_num_undelivered_messages : Optional [int ] = None ,
5998+ autoscaling_pubsub_subscription_labels : Optional [Dict [str , str ]] = None ,
59315999 spot : bool = False ,
59326000 enable_access_logging = False ,
59336001 disable_container_logging : bool = False ,
@@ -6048,6 +6116,12 @@ def _deploy(
60486116 autoscaling_target_request_count_per_minute (int):
60496117 Optional. The target number of requests per minute for autoscaling.
60506118 If set, the model will be scaled based on the number of requests it receives.
6119+ autoscaling_target_pubsub_num_undelivered_messages (int):
6120+ Optional. The target number of pubsub undelivered messages for autoscaling.
6121+ If set, the model will be scaled based on the pubsub queue size.
6122+ autoscaling_pubsub_subscription_labels (Dict[str, str]):
6123+ Optional. Monitored resource labels as key value pairs for
6124+ metric filtering for pubsub_num_undelivered_messages.
60516125 spot (bool):
60526126 Optional. Whether to schedule the deployment workload on spot VMs.
60536127 enable_access_logging (bool):
@@ -6137,6 +6211,8 @@ def _deploy(
61376211 autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
61386212 autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
61396213 autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
6214+ autoscaling_target_pubsub_num_undelivered_messages = autoscaling_target_pubsub_num_undelivered_messages ,
6215+ autoscaling_pubsub_subscription_labels = autoscaling_pubsub_subscription_labels ,
61406216 spot = spot ,
61416217 enable_access_logging = enable_access_logging ,
61426218 disable_container_logging = disable_container_logging ,
0 commit comments