Skip to content

fix: port forward for local interactive tests #834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,12 @@ jobs:
kubectl create clusterrolebinding sdk-user-localqueue-creator --clusterrole=localqueue-creator --user=sdk-user
kubectl create clusterrole list-secrets --verb=get,list --resource=secrets
kubectl create clusterrolebinding sdk-user-list-secrets --clusterrole=list-secrets --user=sdk-user
kubectl create clusterrole pod-creator --verb=get,list --resource=pods
kubectl create clusterrole pod-creator --verb=get,list,watch --resource=pods
kubectl create clusterrolebinding sdk-user-pod-creator --clusterrole=pod-creator --user=sdk-user
kubectl create clusterrole service-reader --verb=get,list,watch --resource=services
kubectl create clusterrolebinding sdk-user-service-reader --clusterrole=service-reader --user=sdk-user
kubectl create clusterrole port-forward-pods --verb=create --resource=pods/portforward
kubectl create clusterrolebinding sdk-user-port-forward-pods-binding --clusterrole=port-forward-pods --user=sdk-user
kubectl config use-context sdk-user

- name: Run e2e tests
Expand Down
59 changes: 46 additions & 13 deletions tests/e2e/local_interactive_sdk_kind_test.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
from codeflare_sdk import (
Cluster,
ClusterConfiguration,
TokenAuthentication,
generate_cert,
)

import pytest
import ray
import math
import subprocess

from support import *


@pytest.mark.kind
class TestRayLocalInteractiveOauth:
class TestRayLocalInteractiveKind:
def setup_method(self):
initialize_kubernetes_client(self)
self.port_forward_process = None

def cleanup_port_forward(self):
if self.port_forward_process:
self.port_forward_process.terminate()
self.port_forward_process.wait(timeout=10)
self.port_forward_process = None

def teardown_method(self):
self.cleanup_port_forward()
delete_namespace(self)
delete_kueue_resources(self)

Expand All @@ -39,6 +47,8 @@ def run_local_interactives(
):
cluster_name = "test-ray-cluster-li"

ray.shutdown()

cluster = Cluster(
ClusterConfiguration(
name=cluster_name,
Expand All @@ -49,25 +59,24 @@ def run_local_interactives(
head_memory_requests=2,
head_memory_limits=2,
worker_cpu_requests="500m",
worker_cpu_limits=1,
worker_cpu_limits="500m",
worker_memory_requests=1,
worker_memory_limits=4,
worker_extended_resource_requests={gpu_resource_name: number_of_gpus},
write_to_file=True,
verify_tls=False,
)
)

cluster.up()

cluster.wait_ready()
cluster.status()

generate_cert.generate_tls_cert(cluster_name, self.namespace)
generate_cert.export_env(cluster_name, self.namespace)

print(cluster.local_client_url())

ray.shutdown()
ray.init(address=cluster.local_client_url(), logging_level="DEBUG")

@ray.remote(num_gpus=number_of_gpus / 2)
def heavy_calculation_part(num_iterations):
result = 0.0
Expand All @@ -84,10 +93,34 @@ def heavy_calculation(num_iterations):
)
return sum(results)

ref = heavy_calculation.remote(3000)
result = ray.get(ref)
assert result == 1789.4644387076714
ray.cancel(ref)
ray.shutdown()
# Attempt to port forward
try:
local_port = "20001"
ray_client_port = "10001"

port_forward_cmd = [
"kubectl",
"port-forward",
"-n",
self.namespace,
f"svc/{cluster_name}-head-svc",
f"{local_port}:{ray_client_port}",
]
self.port_forward_process = subprocess.Popen(
port_forward_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
)

client_url = f"ray://localhost:{local_port}"
cluster.status()

ray.init(address=client_url, logging_level="INFO")

ref = heavy_calculation.remote(3000)
result = ray.get(ref)
assert result == 1789.4644387076714
ray.cancel(ref)
ray.shutdown()

cluster.down()
cluster.down()
finally:
self.cleanup_port_forward()
2 changes: 0 additions & 2 deletions tests/e2e/support.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import json
import os
import random
import string
import subprocess
from codeflare_sdk import get_cluster
from kubernetes import client, config
import kubernetes.client
from codeflare_sdk.common.kubernetes_cluster.kube_api_helpers import (
_kube_api_error_handling,
)
Expand Down
Loading