enable istio as a provider + configuring destinationRule

Gregory-Pereira · Gregory-Pereira · commit 59f07420334e · 2025-08-14T11:16:15.000-07:00
Signed-off-by: greg pereira &lt;grpereir@redhat.com&gt;
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -16,7 +16,7 @@ To install via the latest published chart in staging  (--version v0 indicates la
 ```txt
 $ helm install vllm-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set provider.name=[none|gke] \
+  --set provider.name=[none|gke|istio] \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
 
@@ -75,7 +75,7 @@ Use `--set inferencePool.modelServerType=triton-tensorrt-llm` to install for Tri
 $ helm install triton-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=triton-llama3-8b-instruct \
   --set inferencePool.modelServerType=triton-tensorrt-llm \
-  --set provider.name=[none|gke] \
+  --set provider.name=[none|gke|istio] \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
 
@@ -124,7 +124,7 @@ The following table list the configurable parameters of the chart.
 | `inferenceExtension.extraContainerPorts`    | List of additional container ports to expose. Defaults to `[]`.                                                       |
 | `inferenceExtension.extraServicePorts`      | List of additional service ports to expose. Defaults to `[]`.                                                         |
 | `inferenceExtension.logVerbosity`           | Logging verbosity level for the endpoint picker. Defaults to `"3"`.                                                   |
-| `provider.name`                             | Name of the Inference Gateway implementation being used. Possible values: `gke`. Defaults to `none`.                   |
+| `provider.name`                             | Name of the Inference Gateway implementation being used. Possible values: [`gke`, `none`, `istio`]. Defaults to `none`.                   |
 | `inferenceExtension.enableLeaderElection`   | Enable leader election for high availability. When enabled, only one EPP pod (the leader) will be ready to serve traffic. It is recommended to set `inferenceExtension.replicas` to a value greater than 1 when this is set to `true`. Defaults to `false`. |
 
 
diff --git a/config/charts/inferencepool/templates/istio.yaml b/config/charts/inferencepool/templates/istio.yaml
@@ -0,0 +1,27 @@
+{{- if eq .Values.provider.name "istio" }}
+---
+{{- if .Values.istio.destinationRule.enabled }}
+apiVersion: networking.istio.io/v1beta1
+kind: DestinationRule
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+spec:
+  host: {{ .Values.istio.destinationRule.host | default (printf "%s.%s.svc.cluster.local" (include "gateway-api-inference-extension.name" .) .Release.Namespace) }}
+  {{- if .Values.istio.destinationRule.trafficPolicy }}
+  trafficPolicy:
+    {{- toYaml .Values.istio.destinationRule.trafficPolicy | nindent 4 }}
+  {{- end }}
+  {{- with .Values.istio.destinationRule.subsets }}
+  subsets:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  {{- with .Values.istio.destinationRule.exportTo }}
+  exportTo:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  {{- with .Values.istio.destinationRule.workloadSelector }}
+  workloadSelector:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{- end }}
+{{- end }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -45,10 +45,26 @@ inferencePool:
     # matchLabels: 
     #   app: vllm-llama3-8b-instruct
 
+# Options: ["gke", "istio", "none"]
 provider:
   name: none
 
 gke:
   monitoringSecret:
     name: inference-gateway-sa-metrics-reader-secret
     namespace: default
+
+istio:
+  destinationRule:
+    enabled: true
+    # Provide a way to override the default calculated host
+    host: "" 
+    # Optional: Apply a mesh-wide traffic policy
+    trafficPolicy: {}
+    # Optional: Define subsets for versioned routing (e.g., by labels)
+    subsets: []
+    # Optional: Control which namespaces can access this DestinationRule
+    exportTo: []
+    # Optional: Apply only to specific workloads (via selector labels)
+    workloadSelector: {}
+