vLLM Profiling #35
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # TODO: Refactor the workflows to extract the common parts into a GHA reusable module | |
| name: vLLM Profiling | |
| on: | |
| schedule: | |
| - cron: '0 0 * * 0' | |
| workflow_dispatch: | |
| inputs: | |
| vllm_branch: | |
| description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request) | |
| required: true | |
| type: string | |
| default: main | |
| vllm_commit: | |
| description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked) | |
| required: false | |
| type: string | |
| # TODO: add support for profiling on a specific model and runner | |
| pull_request: | |
| paths: | |
| - .github/workflows/vllm-profiling.yml | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
| cancel-in-progress: true | |
| jobs: | |
| set-parameters: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.12' | |
| profiling: | |
| name: Run vLLM profiling | |
| needs: set-parameters | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - runs-on: linux.aws.a100 | |
| device-name: cuda | |
| runs-on: ${{ matrix.runs-on }} | |
| environment: pytorch-x-vllm | |
| permissions: | |
| id-token: write | |
| contents: read | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Checkout vLLM repository | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: vllm-project/vllm | |
| path: vllm-profiling/vllm | |
| ref: ${{ inputs.vllm_branch || 'main' }} | |
| fetch-depth: 0 | |
| - uses: actions/setup-python@v5 | |
| continue-on-error: true | |
| with: | |
| python-version: '3.12' | |
| cache: 'pip' | |
| - name: Check if the device is supported | |
| shell: bash | |
| run: | | |
| set -eux | |
| if command -v nvidia-smi; then | |
| DEVICE_NAME=cuda | |
| nvidia-smi | |
| elif command -v rocm-smi; then | |
| DEVICE_NAME=rocm | |
| rocm-smi | |
| else | |
| DEVICE_NAME=cpu | |
| lscpu | |
| fi | |
| echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV | |
| - name: Set GPU name and type | |
| shell: bash | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
| DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') | |
| elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs) | |
| elif [[ "${DEVICE_NAME}" == "cpu" ]]; then | |
| DEVICE_TYPE=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1' | cut -f 2 -d " ") | |
| fi | |
| echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV | |
| - name: Install dependencies | |
| shell: bash | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| pip install -r .github/scripts/requirements.txt \ | |
| --extra-index-url https://download.pytorch.org/whl/rocm6.3 | |
| else | |
| pip install -r .github/scripts/requirements.txt \ | |
| --extra-index-url https://download.pytorch.org/whl/cu128 | |
| fi | |
| - name: Set Docker registry | |
| shell: bash | |
| env: | |
| HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} | |
| run: | | |
| set -eux | |
| if [[ "${HEAD_BRANCH}" == "main" ]]; then | |
| DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo | |
| else | |
| DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo | |
| fi | |
| DOCKER_IMAGE_SUFFIX="" | |
| if [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
| DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci | |
| elif [[ "${DEVICE_NAME}" == "cpu" ]]; then | |
| DOCKER_IMAGE_SUFFIX=-cpu | |
| fi | |
| echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV | |
| echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV | |
| - name: Check for last commit | |
| working-directory: vllm-profiling/vllm | |
| env: | |
| HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} | |
| HEAD_SHA: ${{ inputs.vllm_commit || '' }} | |
| run: | | |
| set -eux | |
| if [[ -z "${HEAD_SHA}" ]]; then | |
| for i in {0..99} | |
| do | |
| HEAD_SHA=$(git rev-parse --verify HEAD~${i}) | |
| DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}" | |
| # Docker image available for this commit, then exit | |
| if docker manifest inspect "${DOCKER_IMAGE}"; then | |
| break | |
| fi | |
| done | |
| fi | |
| echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV | |
| echo "### Run profiling on [${HEAD_SHA}](https://github.com/vllm-project/vllm/commit/${HEAD_SHA})" >> "${GITHUB_STEP_SUMMARY}" | |
| - name: Setup CUDA GPU_FLAG for docker run | |
| if: env.DEVICE_NAME == 'cuda' | |
| run: | | |
| echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | |
| - name: Setup ROCm | |
| if: env.DEVICE_NAME == 'rocm' | |
| uses: pytorch/pytorch/./.github/actions/setup-rocm@main | |
| - name: Setup SCCACHE_SERVER_PORT environment for docker run when on container | |
| run: | | |
| echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}" | |
| - name: Run vLLM profiling | |
| env: | |
| SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 | |
| SCCACHE_REGION: us-east-1 | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }} | |
| VLLM_USE_MODELSCOPE: false | |
| VLLM_TORCH_PROFILER_DIR: /tmp/workspace/vllm-profiling/profiling-results | |
| CUDA_VISIBLE_DEVICES: 0 | |
| VLLM_USE_V1: 1 | |
| run: | | |
| set -eux | |
| if [[ "${DEVICE_NAME}" == "cpu" ]]; then | |
| ON_CPU=1 | |
| else | |
| ON_CPU=0 | |
| fi | |
| container_name=$(docker run \ | |
| ${GPU_FLAG:-} \ | |
| ${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \ | |
| -e SCCACHE_BUCKET \ | |
| -e SCCACHE_REGION \ | |
| -e DEVICE_NAME \ | |
| -e DEVICE_TYPE \ | |
| -e HF_TOKEN \ | |
| -e VLLM_USE_MODELSCOPE \ | |
| -e VLLM_TORCH_PROFILER_DIR \ | |
| -e CUDA_VISIBLE_DEVICES \ | |
| -e VLLM_USE_V1 \ | |
| -e ON_CPU="${ON_CPU}" \ | |
| -e S3_HEAD_SHA="${HEAD_SHA}" \ | |
| -e S3_GITHUB_RUN_ID="${GITHUB_RUN_ID}" \ | |
| -e S3_GITHUB_JOB="${GITHUB_JOB}" \ | |
| --ipc=host \ | |
| --tty \ | |
| --detach \ | |
| --security-opt seccomp=unconfined \ | |
| --shm-size=4g \ | |
| -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | |
| -w /tmp/workspace \ | |
| "${DOCKER_IMAGE}" | |
| ) | |
| docker exec -t "${container_name}" bash -c "cd vllm-profiling && bash ../.github/scripts/run_vllm_profiling.sh" | |
| - name: Prepare S3 upload metadata | |
| id: prepare_s3_upload | |
| env: | |
| REPOSITORY: vllm-project/vllm | |
| run: | | |
| set -eux | |
| UPLOAD_DATE=$(date -u +"%Y-%m-%d") | |
| echo "upload-date=${UPLOAD_DATE}" >> "${GITHUB_OUTPUT}" | |
| echo "s3-prefix=${UPLOAD_DATE}/${REPOSITORY}" >> "${GITHUB_OUTPUT}" | |
| - name: Upload profiling results to S3 | |
| uses: seemethere/upload-artifact-s3@v5 | |
| with: | |
| s3-prefix: ${{ steps.prepare_s3_upload.outputs.s3-prefix }} | |
| retention-days: 180 | |
| path: vllm-profiling/profiling-results | |
| if-no-files-found: warn | |
| - uses: actions/upload-artifact@v4 | |
| with: | |
| name: profiling-results--${{ env.DEVICE_TYPE }} | |
| path: vllm-profiling/profiling-results |