fp8 forward #399

Workflow file for this run

.github/workflows/amd_tests.yml at b715392

	name: AMD Perf Kernel Tests

	on:
	workflow_dispatch:
	pull_request:
	branches: [main_perf]
	merge_group:
	branches: [main_perf]
	types: [checks_requested]
	push:
	branches: [main_perf]

	concurrency:
	group: ${{ github.ref }}
	cancel-in-progress: true

	permissions: read-all

	jobs:
	Integration-Tests-AMD:
	runs-on: ${{ matrix.runner }}
	strategy:
	matrix:
	runner: [ubuntu-22.04, linux-mi300-gpu-1]
	fail-fast: false # disables failing the entire job when one matrix entry fails
	container:
	image: rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0
	options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
	steps:
	- name: Checkout
	uses: actions/checkout@v4
	- name: Show Device Info
	run: \|
	rocminfo \| grep gfx
	- name: Uninstall Triton
	run : \|
	pip uninstall -y triton
	rm -rf ~/.triton
	rm -rf ./triton/python/build
	- name: Install Triton
	run: \|
	git clone https://github.com/triton-lang/triton
	cd triton
	pip install ninja cmake wheel pybind11 # build-time dependencies
	pip install --verbose --no-build-isolation ./python
	cd ..
	- name: Show Triton version
	run: \|
	pip show triton
	- name: Build
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	python setup.py install

	# CPU Tests
	- name: Flash Attention Tests Using Reference Impl
	if: matrix.runner == 'ubuntu-22.04'
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	export FLASH_ATTENTION_TRITON_AMD_REF=1
	pytest tests/test_flash_attn_triton_amd.py

	# CDNA Tests
	- name: Flash Attention CDNA Tests
	if: matrix.runner == 'linux-mi300-gpu-1'
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	pytest tests/test_flash_attn_triton_amd.py
	- name: AMD Tests
	if: matrix.runner == 'linux-mi300-gpu-1'
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	pytest -v -s flash_attn/flash_attn_triton_amd/test.py
	- name: AMD Bench
	if: matrix.runner == 'linux-mi300-gpu-1'
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	python flash_attn/flash_attn_triton_amd/bench.py
	- name: AMD Bench with Autotune
	if: matrix.runner == 'linux-mi300-gpu-1'
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	export FLASH_ATTENTION_TRITON_AMD_AUTOTUNE=1
	python flash_attn/flash_attn_triton_amd/bench.py

	# RDNA Tests
	- name: Flash Attention RDNA Tests
	if: matrix.runner == 'gfx1100'
	run: \|
	export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE"
	pytest tests/test_flash_attn_triton_amd.py::test_flash_attn_output tests/test_flash_attn_triton_amd.py::test_flash_attn_varlen_output tests/test_flash_attn_triton_amd.py::test_flash_attn_kvcache

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fp8 forward #399

Workflow file

fp8 forward #399

Jobs

Run details

Workflow file for this run