diff --git a/Makefile b/Makefile index 34fd13bd07..8945091643 100644 --- a/Makefile +++ b/Makefile @@ -107,7 +107,7 @@ slow_tests_diffusers: test_installs # Run text-generation non-regression tests slow_tests_text_generation_example: test_installs - python -m pip install triton==3.1.0 autoawq + python -m pip install -r requirements_awq.txt BUILD_CUDA_EXT=0 python -m pip install -vvv --no-build-isolation git+https://github.com/HabanaAI/AutoGPTQ.git python -m pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0 python -m pytest tests/test_text_generation_example.py tests/test_encoder_decoder.py -v -s --token $(TOKEN) diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md index 5d399f65dd..9d51bffc8f 100755 --- a/examples/text-generation/README.md +++ b/examples/text-generation/README.md @@ -735,7 +735,7 @@ Currently, this support is limited to UINT4 inference of pre-quantized models on Please run the following command to install AutoAWQ: ```bash -pip install triton==3.1.0 autoawq +pip install -r requirements_awq.txt ``` You can run a *UINT4 weight quantized* model using AutoAWQ by including the argument `--load_quantized_model_with_autoawq`. diff --git a/examples/text-generation/requirements_awq.txt b/examples/text-generation/requirements_awq.txt new file mode 100644 index 0000000000..5632195c99 --- /dev/null +++ b/examples/text-generation/requirements_awq.txt @@ -0,0 +1,2 @@ +triton==3.1.0 +autoawq