vLLM Profiling #35

Workflow file for this run

.github/workflows/vllm-profiling.yml at 2854c13

	# TODO: Refactor the workflows to extract the common parts into a GHA reusable module
	name: vLLM Profiling

	on:
	schedule:
	- cron: '0 0 * * 0'
	workflow_dispatch:
	inputs:
	vllm_branch:
	description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
	required: true
	type: string
	default: main
	vllm_commit:
	description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
	required: false
	type: string
	# TODO: add support for profiling on a specific model and runner
	pull_request:
	paths:
	- .github/workflows/vllm-profiling.yml

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	runs-on: ubuntu-latest
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'
	profiling:
	name: Run vLLM profiling
	needs: set-parameters
	strategy:
	fail-fast: false
	matrix:
	include:
	- runs-on: linux.aws.a100
	device-name: cuda
	runs-on: ${{ matrix.runs-on }}
	environment: pytorch-x-vllm
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Checkout vLLM repository
	uses: actions/checkout@v4
	with:
	repository: vllm-project/vllm
	path: vllm-profiling/vllm
	ref: ${{ inputs.vllm_branch \|\| 'main' }}
	fetch-depth: 0

	- uses: actions/setup-python@v5
	continue-on-error: true
	with:
	python-version: '3.12'
	cache: 'pip'

	- name: Check if the device is supported
	shell: bash
	run: \|
	set -eux

	if command -v nvidia-smi; then
	DEVICE_NAME=cuda
	nvidia-smi
	elif command -v rocm-smi; then
	DEVICE_NAME=rocm
	rocm-smi
	else
	DEVICE_NAME=cpu
	lscpu
	fi
	echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV

	- name: Set GPU name and type
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader \| awk '{print $2}')
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DEVICE_TYPE=$(rocminfo \| grep "Marketing Name" \| tail -n1 \| awk -F':' '{print $2}' \| xargs)
	elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
	DEVICE_TYPE=$(lscpu \| grep 'Model name' \| cut -f 2 -d ":" \| awk '{$1=$1}1' \| cut -f 2 -d " ")
	fi
	echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV

	- name: Install dependencies
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "rocm" ]]; then
	pip install -r .github/scripts/requirements.txt \
	--extra-index-url https://download.pytorch.org/whl/rocm6.3
	else
	pip install -r .github/scripts/requirements.txt \
	--extra-index-url https://download.pytorch.org/whl/cu128
	fi

	- name: Set Docker registry
	shell: bash
	env:
	HEAD_BRANCH: ${{ inputs.vllm_branch \|\| 'main' }}
	run: \|
	set -eux

	if [[ "${HEAD_BRANCH}" == "main" ]]; then
	DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
	else
	DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
	fi

	DOCKER_IMAGE_SUFFIX=""
	if [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
	elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
	DOCKER_IMAGE_SUFFIX=-cpu
	fi
	echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
	echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV

	- name: Check for last commit
	working-directory: vllm-profiling/vllm
	env:
	HEAD_BRANCH: ${{ inputs.vllm_branch \|\| 'main' }}
	HEAD_SHA: ${{ inputs.vllm_commit \|\| '' }}
	run: \|
	set -eux

	if [[ -z "${HEAD_SHA}" ]]; then
	for i in {0..99}
	do
	HEAD_SHA=$(git rev-parse --verify HEAD~${i})
	DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"

	# Docker image available for this commit, then exit
	if docker manifest inspect "${DOCKER_IMAGE}"; then
	break
	fi
	done
	fi

	echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
	echo "### Run profiling on [${HEAD_SHA}](https://github.com/vllm-project/vllm/commit/${HEAD_SHA})" >> "${GITHUB_STEP_SUMMARY}"

	- name: Setup CUDA GPU_FLAG for docker run
	if: env.DEVICE_NAME == 'cuda'
	run: \|
	echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

	- name: Setup ROCm
	if: env.DEVICE_NAME == 'rocm'
	uses: pytorch/pytorch/./.github/actions/setup-rocm@main

	- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
	run: \|
	echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"

	- name: Run vLLM profiling
	env:
	SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
	SCCACHE_REGION: us-east-1
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
	VLLM_USE_MODELSCOPE: false
	VLLM_TORCH_PROFILER_DIR: /tmp/workspace/vllm-profiling/profiling-results
	CUDA_VISIBLE_DEVICES: 0
	VLLM_USE_V1: 1

	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "cpu" ]]; then
	ON_CPU=1
	else
	ON_CPU=0
	fi

	container_name=$(docker run \
	${GPU_FLAG:-} \
	${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
	-e SCCACHE_BUCKET \
	-e SCCACHE_REGION \
	-e DEVICE_NAME \
	-e DEVICE_TYPE \
	-e HF_TOKEN \
	-e VLLM_USE_MODELSCOPE \
	-e VLLM_TORCH_PROFILER_DIR \
	-e CUDA_VISIBLE_DEVICES \
	-e VLLM_USE_V1 \
	-e ON_CPU="${ON_CPU}" \
	-e S3_HEAD_SHA="${HEAD_SHA}" \
	-e S3_GITHUB_RUN_ID="${GITHUB_RUN_ID}" \
	-e S3_GITHUB_JOB="${GITHUB_JOB}" \
	--ipc=host \
	--tty \
	--detach \
	--security-opt seccomp=unconfined \
	--shm-size=4g \
	-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
	-w /tmp/workspace \
	"${DOCKER_IMAGE}"
	)
	docker exec -t "${container_name}" bash -c "cd vllm-profiling && bash ../.github/scripts/run_vllm_profiling.sh"

	- name: Prepare S3 upload metadata
	id: prepare_s3_upload
	env:
	REPOSITORY: vllm-project/vllm
	run: \|
	set -eux

	UPLOAD_DATE=$(date -u +"%Y-%m-%d")
	echo "upload-date=${UPLOAD_DATE}" >> "${GITHUB_OUTPUT}"
	echo "s3-prefix=${UPLOAD_DATE}/${REPOSITORY}" >> "${GITHUB_OUTPUT}"

	- name: Upload profiling results to S3
	uses: seemethere/upload-artifact-s3@v5
	with:
	s3-prefix: ${{ steps.prepare_s3_upload.outputs.s3-prefix }}
	retention-days: 180
	path: vllm-profiling/profiling-results
	if-no-files-found: warn

	- uses: actions/upload-artifact@v4
	with:
	name: profiling-results--${{ env.DEVICE_TYPE }}
	path: vllm-profiling/profiling-results

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

vLLM Profiling #35

Workflow file

vLLM Profiling #35

Uh oh!

Jobs

Run details

Workflow file for this run