address review comments

namanlalitnyu · namanlalitnyu · commit 35e9f393efda · 2025-09-12T13:52:13.000-07:00
diff --git a/.github/workflows/sglang-benchmark.yml b/.github/workflows/sglang-benchmark.yml
@@ -186,7 +186,7 @@ jobs:
           # Verify installations
           echo "$(pwd)/sgl_server_env/bin" >> $GITHUB_PATH
 
-      - name: Install NVCC
+      - name: Install NVCC #TODO: Use docker image (nvidia/cuda:12.8.1-devel-ubuntu22.04) instead of locally specifying the variables
         if: env.DEVICE_NAME == 'cuda'
         shell: bash
         run: |
diff --git a/.github/workflows/vllm-profiling.yml b/.github/workflows/vllm-profiling.yml
@@ -1,3 +1,4 @@
+# TODO: Refactor the workflows to extract the common parts into a GHA reusable module
 name: vLLM Profiling
 
 on:
@@ -14,6 +15,7 @@ on:
         description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
         required: false
         type: string
+      # TODO: add support for profiling on a specific model and runner
   pull_request:
     paths:
       - .github/workflows/vllm-profiling.yml
@@ -39,7 +41,7 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - runs-on: linux.aws.h100.4
+          - runs-on: linux.aws.a100
             device-name: cuda
     runs-on: ${{ matrix.runs-on }}
     environment: pytorch-x-vllm