Skip to content

Commit 334fe3f

Browse files
committed
update the S3 path to include model name
1 parent d67c812 commit 334fe3f

File tree

2 files changed

+53
-26
lines changed

2 files changed

+53
-26
lines changed

.github/scripts/run_vllm_profiling.sh

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@ run_profiling_tests() {
7171
exit 1
7272
fi
7373

74+
# Get S3 path components from environment
75+
local s3_date="${S3_UPLOAD_DATE:-}"
76+
local s3_repo="${S3_REPOSITORY:-}"
77+
local s3_sha="${S3_HEAD_SHA:-}"
78+
local s3_run_id="${S3_GITHUB_RUN_ID:-}"
79+
local s3_job="${S3_GITHUB_JOB:-}"
80+
7481
# Iterate over profiling tests
7582
jq -c '.[]' "$profiling_test_file" | while read -r params; do
7683
# Get the test name
@@ -99,36 +106,58 @@ run_profiling_tests() {
99106
kill_gpu_processes
100107

101108
# Create a profiling sub-directory for each test case to isolate the
102-
# generated traces (e.g. using the model name hierarchy)
103-
local sanitized_test_name="${TEST_NAME// /_}"
104-
local test_name_directory="${base_profiler_dir}/${sanitized_test_name}"
105-
mkdir -p "${test_name_directory}"
106-
chmod 755 "${test_name_directory}"
109+
# generated traces using the S3 path structure
110+
MODEL_NAME=$(echo "$server_params" | jq -r '.model')
111+
local sanitized_model_name="${MODEL_NAME// /_}"
112+
113+
# Build the directory path following S3 structure
114+
local model_name_directory="${base_profiler_dir}"
115+
116+
# Check if all S3 path components are available
117+
if [[ -n "${s3_date}" && -n "${s3_repo}" && -n "${s3_sha}" && -n "${s3_run_id}" && -n "${s3_job}" ]]; then
118+
# Build the complete S3 path structure
119+
model_name_directory="${model_name_directory}/${s3_date}/${s3_repo}/${sanitized_model_name}/${s3_sha}/${s3_run_id}/${s3_job}"
120+
else
121+
# Error out if any S3 variable is missing
122+
echo "ERROR: Required S3 path variables are missing. Cannot proceed with profiling."
123+
echo "Missing variables:"
124+
[[ -z "${s3_date}" ]] && echo " - S3_UPLOAD_DATE"
125+
[[ -z "${s3_repo}" ]] && echo " - S3_REPOSITORY"
126+
[[ -z "${s3_sha}" ]] && echo " - S3_HEAD_SHA"
127+
[[ -z "${s3_run_id}" ]] && echo " - S3_GITHUB_RUN_ID"
128+
[[ -z "${s3_job}" ]] && echo " - S3_GITHUB_JOB"
129+
echo "Please ensure all required environment variables are set."
130+
exit 1
131+
fi
132+
133+
echo "Creating profiling directory: ${model_name_directory}"
134+
mkdir -p "${model_name_directory}"
135+
chmod 755 "${model_name_directory}"
107136

108137
# Override the profiler output directory for this test only
109-
export VLLM_TORCH_PROFILER_DIR="${test_name_directory}"
138+
export VLLM_TORCH_PROFILER_DIR="${model_name_directory}"
110139

111140
# Run the profiling test
112141
if start_vllm_server "$server_args"; then
113142
run_profiling "$client_args"
114143
cleanup_server
115144

116145
# Debug: Check if profiling files were created
117-
echo "DEBUG: Checking profiling directory: $test_name_directory"
118-
if [ -d "$test_name_directory" ]; then
119-
echo "DEBUG: Profiling directory exists for test $TEST_NAME"
120-
ls -la "$test_name_directory" || echo "DEBUG: Directory is empty or inaccessible"
121-
find "$test_name_directory" -type f 2>/dev/null | head -10 | while read file; do
146+
echo "DEBUG: Checking profiling directory: $model_name_directory"
147+
if [ -d "$model_name_directory" ]; then
148+
echo "DEBUG: Profiling directory exists for model $MODEL_NAME"
149+
ls -la "$model_name_directory" || echo "DEBUG: Directory is empty or inaccessible"
150+
find "$model_name_directory" -type f 2>/dev/null | head -10 | while read file; do
122151
echo "DEBUG: Found profiling file: ${file}"
123152
rename_profiling_file "$file" "vllm"
124153
done
125154
else
126-
echo "DEBUG: Profiling directory does not exist for test $TEST_NAME!"
155+
echo "DEBUG: Profiling directory does not exist for model $MODEL_NAME!"
127156
fi
128157

129-
echo "Profiling test $TEST_NAME completed successfully."
158+
echo "Profiling test $MODEL_NAME completed successfully."
130159
else
131-
echo "Failed to start vLLM server for test $TEST_NAME."
160+
echo "Failed to start vLLM server for test $MODEL_NAME."
132161
continue
133162
fi
134163
done

.github/workflows/vllm-profiling.yml

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ jobs:
179179
VLLM_TORCH_PROFILER_DIR: /tmp/workspace/vllm-profiling/profiling-results
180180
CUDA_VISIBLE_DEVICES: 0
181181
VLLM_USE_V1: 1
182+
REPOSITORY: vllm-project/vllm
182183

183184
run: |
184185
set -eux
@@ -189,6 +190,9 @@ jobs:
189190
ON_CPU=0
190191
fi
191192
193+
# Prepare S3 path components
194+
UPLOAD_DATE=$(date -u +"%Y-%m-%d")
195+
192196
container_name=$(docker run \
193197
${GPU_FLAG:-} \
194198
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
@@ -203,6 +207,11 @@ jobs:
203207
-e VLLM_USE_V1 \
204208
-e DEVICE_NAME \
205209
-e ON_CPU="${ON_CPU}" \
210+
-e S3_UPLOAD_DATE="${UPLOAD_DATE}" \
211+
-e S3_REPOSITORY="${REPOSITORY}" \
212+
-e S3_HEAD_SHA="${HEAD_SHA}" \
213+
-e S3_GITHUB_RUN_ID="${GITHUB_RUN_ID}" \
214+
-e S3_GITHUB_JOB="${GITHUB_JOB}" \
206215
--ipc=host \
207216
--tty \
208217
--detach \
@@ -214,21 +223,10 @@ jobs:
214223
)
215224
docker exec -t "${container_name}" bash -c "cd vllm-profiling && bash ../.github/scripts/run_vllm_profiling.sh"
216225
217-
- name: Prepare S3 upload metadata
218-
id: prepare_s3_upload
219-
env:
220-
REPOSITORY: vllm-project/vllm
221-
run: |
222-
set -eux
223-
224-
UPLOAD_DATE=$(date -u +"%Y-%m-%d")
225-
echo "upload-date=${UPLOAD_DATE}" >> "${GITHUB_OUTPUT}"
226-
echo "s3-prefix=${UPLOAD_DATE}/${REPOSITORY}/${HEAD_SHA}/${GITHUB_RUN_ID}/${GITHUB_JOB}" >> "${GITHUB_OUTPUT}"
227-
228226
- name: Upload profiling results to S3
229227
uses: seemethere/upload-artifact-s3@v5
230228
with:
231-
s3-prefix: ${{ steps.prepare_s3_upload.outputs.s3-prefix }}
229+
s3-prefix: ""
232230
retention-days: 180
233231
path: vllm-profiling/profiling-results
234232
if-no-files-found: warn

0 commit comments

Comments
 (0)