Skip to content

fp8 forward

fp8 forward #399

Workflow file for this run

name: AMD Perf Kernel Tests
on:
workflow_dispatch:
pull_request:
branches: [main_perf]
merge_group:
branches: [main_perf]
types: [checks_requested]
push:
branches: [main_perf]
concurrency:
group: ${{ github.ref }}
cancel-in-progress: true
permissions: read-all
jobs:
Integration-Tests-AMD:
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [ubuntu-22.04, linux-mi300-gpu-1]
fail-fast: false # disables failing the entire job when one matrix entry fails
container:
image: rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Show Device Info
run: |
rocminfo | grep gfx
- name: Uninstall Triton
run : |
pip uninstall -y triton
rm -rf ~/.triton
rm -rf ./triton/python/build
- name: Install Triton
run: |
git clone https://github.com/triton-lang/triton
cd triton
pip install ninja cmake wheel pybind11 # build-time dependencies
pip install --verbose --no-build-isolation ./python
cd ..
- name: Show Triton version
run: |
pip show triton
- name: Build
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
python setup.py install
# CPU Tests
- name: Flash Attention Tests Using Reference Impl
if: matrix.runner == 'ubuntu-22.04'
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
export FLASH_ATTENTION_TRITON_AMD_REF=1
pytest tests/test_flash_attn_triton_amd.py
# CDNA Tests
- name: Flash Attention CDNA Tests
if: matrix.runner == 'linux-mi300-gpu-1'
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
pytest tests/test_flash_attn_triton_amd.py
- name: AMD Tests
if: matrix.runner == 'linux-mi300-gpu-1'
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
pytest -v -s flash_attn/flash_attn_triton_amd/test.py
- name: AMD Bench
if: matrix.runner == 'linux-mi300-gpu-1'
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
python flash_attn/flash_attn_triton_amd/bench.py
- name: AMD Bench with Autotune
if: matrix.runner == 'linux-mi300-gpu-1'
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
export FLASH_ATTENTION_TRITON_AMD_AUTOTUNE=1
python flash_attn/flash_attn_triton_amd/bench.py
# RDNA Tests
- name: Flash Attention RDNA Tests
if: matrix.runner == 'gfx1100'
run: |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
pytest tests/test_flash_attn_triton_amd.py::test_flash_attn_output tests/test_flash_attn_triton_amd.py::test_flash_attn_varlen_output tests/test_flash_attn_triton_amd.py::test_flash_attn_kvcache