Skip to content

Commit f11646d

Browse files
AnilAltinaygvisor-bot
authored andcommitted
Add SGLang Docker tests/benchmark
PiperOrigin-RevId: 805509357
1 parent 48fd850 commit f11646d

File tree

9 files changed

+1045
-1
lines changed

9 files changed

+1045
-1
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,13 +310,14 @@ cos-gpu-smoke-tests: gpu-smoke-images $(RUNTIME_BIN)
310310
# This is a superset of those needed for smoke tests.
311311
# It includes non-GPU images that are used as part of GPU tests,
312312
# e.g. busybox and python.
313-
gpu-images: gpu-smoke-images load-gpu_pytorch load-gpu_ollama load-gpu_ollama_client load-basic_busybox load-basic_alpine load-basic_python load-gpu_stable-diffusion-xl load-gpu_vllm load-gpu_nccl-tests load-benchmarks_ffmpeg
313+
gpu-images: gpu-smoke-images load-gpu_pytorch load-gpu_ollama load-gpu_ollama_client load-gpu_sglang load-gpu_sglang_client load-basic_busybox load-basic_alpine load-basic_python load-gpu_stable-diffusion-xl load-gpu_vllm load-gpu_nccl-tests load-benchmarks_ffmpeg
314314
.PHONY: gpu-images
315315

316316
gpu-all-tests: gpu-images gpu-smoke-tests $(RUNTIME_BIN)
317317
@$(call install_runtime,$(RUNTIME),--nvproxy=true --nvproxy-docker=true --nvproxy-allowed-driver-capabilities=all)
318318
@$(call sudo,test/gpu:pytorch_test,--runtime=$(RUNTIME) -test.v $(ARGS))
319319
@$(call sudo,test/gpu:textgen_test,--runtime=$(RUNTIME) -test.v $(ARGS))
320+
@$(call sudo,test/gpu:sglang_test,--runtime=$(RUNTIME) -test.v $(ARGS))
320321
@$(call sudo,test/gpu:imagegen_test,--runtime=$(RUNTIME) -test.v $(ARGS))
321322
@$(call sudo,test/gpu:sr_test,--runtime=$(RUNTIME) -test.v $(ARGS))
322323
@$(call sudo,test/gpu:nccl_test,--runtime=$(RUNTIME) -test.v $(ARGS))
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
FROM alpine/git@sha256:4d7fe8d770483993c0cec264d49a573bac49e5239db47a9846572352e72da49c AS downloader
2+
# post checkout hook. checks that command git lfs is available.
3+
RUN apk add git-lfs && \
4+
git lfs install && \
5+
GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/qwen/qwen2.5-0.5b-instruct /qwen2.5-0.5b-instruct
6+
7+
FROM lmsysorg/sglang@sha256:119cf3a894b380a78d81e1557c8cc58ccc234b4854232e0e1dbf39916a4c7e75
8+
COPY --from=downloader /qwen2.5-0.5b-instruct /qwen2.5-0.5b-instruct
9+
10+
ENTRYPOINT ["python3"]
11+
# If the model gives inconsistent results, check out https://github.com/sgl-project/sglang/issues/1729
12+
CMD ["-m", "sglang.launch_server", "--device", "cuda", "--model", "/qwen2.5-0.5b-instruct", "--host", "0.0.0.0", "--port", "30000", "--random-seed", "42"]

images/gpu/sglang/client/BUILD

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
load("//tools:defs.bzl", "go_binary")
2+
3+
package(
4+
default_applicable_licenses = ["//:license"],
5+
licenses = ["notice"],
6+
)
7+
8+
go_binary(
9+
name = "client",
10+
srcs = ["client.go"],
11+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM golang:1.22 AS builder
2+
3+
COPY client.go /client.go
4+
RUN CGO_ENABLED=0 go build -o /httpclient /client.go
5+
6+
FROM alpine:latest
7+
COPY --from=builder /httpclient /usr/bin/
8+
CMD ["/usr/bin/httpclient"]

images/gpu/sglang/client/client.go

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
// Copyright 2024 The gVisor Authors.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
// A simple `curl`-like HTTP client that prints metrics after the request.
16+
// All of its output is structured to be unambiguous even if stdout/stderr
17+
// is combined, as is the case for Kubernetes logs.
18+
// Useful for communicating with SGLang.
19+
package main
20+
21+
import (
22+
"bufio"
23+
"bytes"
24+
"encoding/base64"
25+
"encoding/json"
26+
"flag"
27+
"fmt"
28+
"net/http"
29+
"os"
30+
"sort"
31+
"strings"
32+
"time"
33+
)
34+
35+
// Flags.
36+
var (
37+
url = flag.String("url", "", "HTTP request URL.")
38+
method = flag.String("method", "GET", "HTTP request method (GET or POST).")
39+
postDataBase64 = flag.String("post_base64", "", "HTTP request POST data in base64 format; ignored for GET requests.")
40+
timeout = flag.Duration("timeout", 0, "HTTP request timeout; 0 for no timeout.")
41+
)
42+
43+
// bufSize is the size of buffers used for HTTP requests and responses.
44+
const bufSize = 1024 * 1024 // 1MiB
45+
46+
// fatalf crashes the program with a given error message.
47+
func fatalf(format string, values ...any) {
48+
fmt.Fprintf(os.Stderr, "FATAL: "+format+"\n", values...)
49+
os.Exit(1)
50+
}
51+
52+
// Metrics contains the request metrics to export to JSON.
53+
// This is parsed by the sglang library at `test/gpu/sglang/sglang.go`.
54+
type Metrics struct {
55+
// ProgramStarted is the time when the program started.
56+
ProgramStarted time.Time `json:"program_started"`
57+
// RequestSent is the time when the HTTP request was sent.
58+
RequestSent time.Time `json:"request_sent"`
59+
// ResponseReceived is the time when the HTTP response headers were received.
60+
ResponseReceived time.Time `json:"response_received"`
61+
// FirstByteRead is the time when the first HTTP response body byte was read.
62+
FirstByteRead time.Time `json:"first_byte_read"`
63+
// LastByteRead is the time when the last HTTP response body byte was read.
64+
LastByteRead time.Time `json:"last_byte_read"`
65+
}
66+
67+
func main() {
68+
var metrics Metrics
69+
metrics.ProgramStarted = time.Now()
70+
flag.Parse()
71+
if *url == "" {
72+
fatalf("--url is required")
73+
}
74+
client := http.Client{
75+
Transport: &http.Transport{
76+
MaxIdleConns: 1,
77+
IdleConnTimeout: *timeout,
78+
ReadBufferSize: bufSize,
79+
WriteBufferSize: bufSize,
80+
},
81+
Timeout: *timeout,
82+
}
83+
var request *http.Request
84+
var err error
85+
switch *method {
86+
case "GET":
87+
request, err = http.NewRequest("GET", *url, nil)
88+
case "POST":
89+
postData, postDataErr := base64.StdEncoding.DecodeString(*postDataBase64)
90+
if postDataErr != nil {
91+
fatalf("cannot decode POST data: %v", postDataErr)
92+
}
93+
request, err = http.NewRequest("POST", *url, bytes.NewBuffer(postData))
94+
default:
95+
err = fmt.Errorf("unknown method %q", *method)
96+
}
97+
if err != nil {
98+
fatalf("cannot create request: %v", err)
99+
}
100+
orderedReqHeaders := make([]string, 0, len(request.Header))
101+
for k := range request.Header {
102+
orderedReqHeaders = append(orderedReqHeaders, k)
103+
}
104+
sort.Strings(orderedReqHeaders)
105+
for _, k := range orderedReqHeaders {
106+
for _, v := range request.Header[k] {
107+
fmt.Fprintf(os.Stderr, "REQHEADER: %s: %s\n", k, v)
108+
}
109+
}
110+
metrics.RequestSent = time.Now()
111+
resp, err := client.Do(request)
112+
metrics.ResponseReceived = time.Now()
113+
if err != nil {
114+
fatalf("cannot make request: %v", err)
115+
}
116+
gotFirstByte := false
117+
scanner := bufio.NewScanner(resp.Body)
118+
for scanner.Scan() {
119+
if !gotFirstByte {
120+
metrics.FirstByteRead = time.Now()
121+
gotFirstByte = true
122+
}
123+
if scanner.Text() == "" {
124+
continue
125+
}
126+
fmt.Printf("BODY: %q\n", strings.TrimPrefix(scanner.Text(), "data: "))
127+
}
128+
// Check for any errors that may have occurred during scanning
129+
if err := scanner.Err(); err != nil {
130+
fatalf("error reading response body: %v", err)
131+
}
132+
metrics.LastByteRead = time.Now()
133+
if err := resp.Body.Close(); err != nil {
134+
fatalf("cannot close response body: %v", err)
135+
}
136+
orderedRespHeaders := make([]string, 0, len(resp.Header))
137+
for k := range resp.Header {
138+
orderedRespHeaders = append(orderedRespHeaders, k)
139+
}
140+
sort.Strings(orderedRespHeaders)
141+
for _, k := range orderedRespHeaders {
142+
for _, v := range resp.Header[k] {
143+
fmt.Fprintf(os.Stderr, "RESPHEADER: %s: %s\n", k, v)
144+
}
145+
}
146+
metricsBytes, err := json.Marshal(&metrics)
147+
if err != nil {
148+
fatalf("cannot marshal metrics: %v", err)
149+
}
150+
fmt.Fprintf(os.Stderr, "STATS: %s\n", string(metricsBytes))
151+
}

test/gpu/BUILD

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,25 @@ go_test(
5252
],
5353
)
5454

55+
go_test(
56+
name = "sglang_test",
57+
srcs = ["sglang_test.go"],
58+
# runsc is needed to invalidate the bazel cache in case of any code changes.
59+
data = ["//runsc"],
60+
embedsrcs = ["gvisor.png"],
61+
tags = [
62+
"manual",
63+
"noguitar",
64+
"notap",
65+
],
66+
visibility = ["//:sandbox"],
67+
deps = [
68+
"//pkg/test/dockerutil",
69+
"//pkg/test/testutil",
70+
"//test/gpu/sglang",
71+
],
72+
)
73+
5574
go_test(
5675
name = "sr_test",
5776
srcs = ["sr_test.go"],

test/gpu/sglang/BUILD

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
load("//tools:defs.bzl", "go_library")
2+
3+
package(
4+
default_applicable_licenses = ["//:license"],
5+
licenses = ["notice"],
6+
)
7+
8+
go_library(
9+
name = "sglang",
10+
testonly = 1,
11+
srcs = ["sglang.go"],
12+
stateify = False, # Does not support some generics methods.
13+
visibility = ["//:sandbox"],
14+
deps = [
15+
"//pkg/test/dockerutil",
16+
"//pkg/test/testutil",
17+
],
18+
)

0 commit comments

Comments
 (0)