Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions .buildkite/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ _templates:
if: build.env("STAGED_BINARIES") == null && build.branch != "master"
source_test_continuous: &source_test_continuous
if: build.env("STAGED_BINARIES") == null && build.branch == "master"
gpu_test: &gpu_test
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
gpu_test_continuous: &gpu_test_continuous
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
platform_specific_agents: &platform_specific_agents {}
kvm_agents: &kvm_agents {kvm: "true"}
ubuntu_agents: &ubuntu_agents {os: "ubuntu"}
Expand Down Expand Up @@ -184,22 +188,30 @@ steps:
commands:
- tools/gpu/cos_drivers_test.sh
- <<: *common
<<: *gpu_test
label: ":screwdriver: GPU Tests"
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
commands:
- make sudo TARGETS=//tools/gpu:main ARGS="install --latest" || cat /var/log/nvidia-installer.log
- make gpu-all-tests
agents:
queue: gpu
- <<: *common
<<: *gpu_test_continuous
label: ":chainsaw: L4 GPU Tests"
commands:
- make sudo TARGETS=//tools/gpu:main ARGS="install --latest" || cat /var/log/nvidia-installer.log
- make l4-gpu-tests
agents:
queue: l4-gpu
- <<: *common
<<: *gpu_test
label: ":female_supervillain: COS GPU Tests"
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
commands:
- make cos-gpu-all-tests
agents:
queue: cos-canary-gpu
- label: ":fish: CUDA tests"
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
- <<: *gpu_test
label: ":fish: CUDA tests"
# This is its own test rather than being part of the GPU tests,
# because it takes around 30 minutes to run.
parallelism: 8
Expand All @@ -212,8 +224,8 @@ steps:
agents:
queue: gpu
- <<: *common
<<: *gpu_test
label: ":screwdriver: All GPU Drivers Test"
if: build.env("STAGED_BINARIES") == null && ( build.env("SKIP_GPU_TESTS") == null || build.message =~ /nvidia|nvproxy|gpu/i )
parallelism: 8
commands:
- tools/gpu/all_drivers_test.sh
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,11 @@ cos-gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN)
gpu-images: gpu-smoke-images load-gpu_pytorch load-gpu_ollama load-gpu_ollama_client load-basic_busybox load-basic_alpine load-basic_python load-gpu_stable-diffusion-xl load-gpu_vllm load-gpu_nccl-tests load-benchmarks_ffmpeg
.PHONY: gpu-images

l4-gpu-tests: load-gpu_sglang load-gpu_sglang_client $(RUNTIME_BIN)
@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all)
@$(call sudo,test/gpu:sglang_test,--runtime=$(RUNTIME) -test.v $(ARGS))
.PHONY: l4-gpu-tests

gpu-all-tests: gpu-images gpu-smoke-tests $(RUNTIME_BIN)
@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all)
@$(call sudo,test/gpu:pytorch_test,--runtime=$(RUNTIME) -test.v $(ARGS))
Expand Down
4 changes: 4 additions & 0 deletions images/gpu/ollama/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ import (
"time"
)

// LINT.IfChange

// Flags.
var (
url = flag.String("url", "", "HTTP request URL.")
Expand Down Expand Up @@ -150,3 +152,5 @@ func main() {
}
fmt.Fprintf(os.Stderr, "STATS: %s\n", string(metricsBytes))
}

// LINT.ThenChange(../../sglang/client/client.go)
11 changes: 11 additions & 0 deletions images/gpu/sglang/Dockerfile.x86_64
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM alpine/git@sha256:4d7fe8d770483993c0cec264d49a573bac49e5239db47a9846572352e72da49c AS downloader
# post checkout hook. checks that command git lfs is available.
RUN apk add git-lfs && \
git lfs install && \
GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/qwen/qwen2.5-0.5b-instruct /qwen2.5-0.5b-instruct

FROM lmsysorg/sglang@sha256:119cf3a894b380a78d81e1557c8cc58ccc234b4854232e0e1dbf39916a4c7e75
COPY --from=downloader /qwen2.5-0.5b-instruct /qwen2.5-0.5b-instruct

ENTRYPOINT ["python3"]
CMD ["-m", "sglang.launch_server", "--device", "cuda", "--model", "/qwen2.5-0.5b-instruct", "--host", "0.0.0.0", "--port", "30000", "--random-seed", "42"]
11 changes: 11 additions & 0 deletions images/gpu/sglang/client/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
load("//tools:defs.bzl", "go_binary")

package(
default_applicable_licenses = ["//:license"],
licenses = ["notice"],
)

go_binary(
name = "client",
srcs = ["client.go"],
)
8 changes: 8 additions & 0 deletions images/gpu/sglang/client/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM golang:1.22 AS builder

COPY client.go /client.go
RUN CGO_ENABLED=0 go build -o /httpclient /client.go

FROM alpine:latest
COPY --from=builder /httpclient /usr/bin/
CMD ["/usr/bin/httpclient"]
155 changes: 155 additions & 0 deletions images/gpu/sglang/client/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// Copyright 2024 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// A simple `curl`-like HTTP client that prints metrics after the request.
// All of its output is structured to be unambiguous even if stdout/stderr
// is combined, as is the case for Kubernetes logs.
// Useful for communicating with SGLang.
package main

import (
"bufio"
"bytes"
"encoding/base64"
"encoding/json"
"flag"
"fmt"
"net/http"
"os"
"sort"
"strings"
"time"
)

// LINT.IfChange

// Flags.
var (
url = flag.String("url", "", "HTTP request URL.")
method = flag.String("method", "GET", "HTTP request method (GET or POST).")
postDataBase64 = flag.String("post_base64", "", "HTTP request POST data in base64 format; ignored for GET requests.")
timeout = flag.Duration("timeout", 0, "HTTP request timeout; 0 for no timeout.")
)

// bufSize is the size of buffers used for HTTP requests and responses.
const bufSize = 1024 * 1024 // 1MiB

// fatalf crashes the program with a given error message.
func fatalf(format string, values ...any) {
fmt.Fprintf(os.Stderr, "FATAL: "+format+"\n", values...)
os.Exit(1)
}

// Metrics contains the request metrics to export to JSON.
// This is parsed by the sglang library at `test/gpu/sglang/sglang.go`.
type Metrics struct {
// ProgramStarted is the time when the program started.
ProgramStarted time.Time `json:"program_started"`
// RequestSent is the time when the HTTP request was sent.
RequestSent time.Time `json:"request_sent"`
// ResponseReceived is the time when the HTTP response headers were received.
ResponseReceived time.Time `json:"response_received"`
// FirstByteRead is the time when the first HTTP response body byte was read.
FirstByteRead time.Time `json:"first_byte_read"`
// LastByteRead is the time when the last HTTP response body byte was read.
LastByteRead time.Time `json:"last_byte_read"`
}

func main() {
var metrics Metrics
metrics.ProgramStarted = time.Now()
flag.Parse()
if *url == "" {
fatalf("--url is required")
}
client := http.Client{
Transport: &http.Transport{
MaxIdleConns: 1,
IdleConnTimeout: *timeout,
ReadBufferSize: bufSize,
WriteBufferSize: bufSize,
},
Timeout: *timeout,
}
var request *http.Request
var err error
switch *method {
case "GET":
request, err = http.NewRequest("GET", *url, nil)
case "POST":
postData, postDataErr := base64.StdEncoding.DecodeString(*postDataBase64)
if postDataErr != nil {
fatalf("cannot decode POST data: %v", postDataErr)
}
request, err = http.NewRequest("POST", *url, bytes.NewBuffer(postData))
default:
err = fmt.Errorf("unknown method %q", *method)
}
if err != nil {
fatalf("cannot create request: %v", err)
}
orderedReqHeaders := make([]string, 0, len(request.Header))
for k := range request.Header {
orderedReqHeaders = append(orderedReqHeaders, k)
}
sort.Strings(orderedReqHeaders)
for _, k := range orderedReqHeaders {
for _, v := range request.Header[k] {
fmt.Fprintf(os.Stderr, "REQHEADER: %s: %s\n", k, v)
}
}
metrics.RequestSent = time.Now()
resp, err := client.Do(request)
metrics.ResponseReceived = time.Now()
if err != nil {
fatalf("cannot make request: %v", err)
}
gotFirstByte := false
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
if !gotFirstByte {
metrics.FirstByteRead = time.Now()
gotFirstByte = true
}
if scanner.Text() == "" {
continue
}
fmt.Printf("BODY: %q\n", strings.TrimPrefix(scanner.Text(), "data: "))
}
// Check for any errors that may have occurred during scanning
if err := scanner.Err(); err != nil {
fatalf("error reading response body: %v", err)
}
metrics.LastByteRead = time.Now()
if err := resp.Body.Close(); err != nil {
fatalf("cannot close response body: %v", err)
}
orderedRespHeaders := make([]string, 0, len(resp.Header))
for k := range resp.Header {
orderedRespHeaders = append(orderedRespHeaders, k)
}
sort.Strings(orderedRespHeaders)
for _, k := range orderedRespHeaders {
for _, v := range resp.Header[k] {
fmt.Fprintf(os.Stderr, "RESPHEADER: %s: %s\n", k, v)
}
}
metricsBytes, err := json.Marshal(&metrics)
if err != nil {
fatalf("cannot marshal metrics: %v", err)
}
fmt.Fprintf(os.Stderr, "STATS: %s\n", string(metricsBytes))
}

// LINT.ThenChange(../../ollama/client/client.go)
18 changes: 18 additions & 0 deletions test/gpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,24 @@ go_test(
],
)

go_test(
name = "sglang_test",
srcs = ["sglang_test.go"],
# runsc is needed to invalidate the bazel cache in case of any code changes.
data = ["//runsc"],
tags = [
"manual",
"noguitar",
"notap",
],
visibility = ["//:sandbox"],
deps = [
"//pkg/test/dockerutil",
"//pkg/test/testutil",
"//test/gpu/sglang",
],
)

go_test(
name = "sr_test",
srcs = ["sr_test.go"],
Expand Down
18 changes: 18 additions & 0 deletions test/gpu/sglang/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
load("//tools:defs.bzl", "go_library")

package(
default_applicable_licenses = ["//:license"],
licenses = ["notice"],
)

go_library(
name = "sglang",
testonly = 1,
srcs = ["sglang.go"],
stateify = False, # Does not support some generics methods.
visibility = ["//:sandbox"],
deps = [
"//pkg/test/dockerutil",
"//pkg/test/testutil",
],
)
Loading