fp8 forward #399
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: AMD Perf Kernel Tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
branches: [main_perf] | |
merge_group: | |
branches: [main_perf] | |
types: [checks_requested] | |
push: | |
branches: [main_perf] | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: true | |
permissions: read-all | |
jobs: | |
Integration-Tests-AMD: | |
runs-on: ${{ matrix.runner }} | |
strategy: | |
matrix: | |
runner: [ubuntu-22.04, linux-mi300-gpu-1] | |
fail-fast: false # disables failing the entire job when one matrix entry fails | |
container: | |
image: rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 | |
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Show Device Info | |
run: | | |
rocminfo | grep gfx | |
- name: Uninstall Triton | |
run : | | |
pip uninstall -y triton | |
rm -rf ~/.triton | |
rm -rf ./triton/python/build | |
- name: Install Triton | |
run: | | |
git clone https://github.com/triton-lang/triton | |
cd triton | |
pip install ninja cmake wheel pybind11 # build-time dependencies | |
pip install --verbose --no-build-isolation ./python | |
cd .. | |
- name: Show Triton version | |
run: | | |
pip show triton | |
- name: Build | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
python setup.py install | |
# CPU Tests | |
- name: Flash Attention Tests Using Reference Impl | |
if: matrix.runner == 'ubuntu-22.04' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
export FLASH_ATTENTION_TRITON_AMD_REF=1 | |
pytest tests/test_flash_attn_triton_amd.py | |
# CDNA Tests | |
- name: Flash Attention CDNA Tests | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
pytest tests/test_flash_attn_triton_amd.py | |
- name: AMD Tests | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
pytest -v -s flash_attn/flash_attn_triton_amd/test.py | |
- name: AMD Bench | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
python flash_attn/flash_attn_triton_amd/bench.py | |
- name: AMD Bench with Autotune | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
export FLASH_ATTENTION_TRITON_AMD_AUTOTUNE=1 | |
python flash_attn/flash_attn_triton_amd/bench.py | |
# RDNA Tests | |
- name: Flash Attention RDNA Tests | |
if: matrix.runner == 'gfx1100' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
pytest tests/test_flash_attn_triton_amd.py::test_flash_attn_output tests/test_flash_attn_triton_amd.py::test_flash_attn_varlen_output tests/test_flash_attn_triton_amd.py::test_flash_attn_kvcache |