Skip to content

Commit 8d65a08

Browse files
authored
Merge branch 'main' into act-reorder
2 parents d39e7f9 + bd0d80c commit 8d65a08

23 files changed

+370
-350
lines changed

.github/workflows/build-container.yml

+9-1
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,12 @@ jobs:
5353
build-args: |
5454
BRANCH=${{github.head_ref}}
5555
push: true
56-
tags: ghcr.io/neuralmagic/sparseml-dev:${{ inputs.name }}
56+
tags: ghcr.io/neuralmagic/sparseml-dev:${{ inputs.name }}
57+
58+
- name: Build Nightly Docker Container
59+
if: ${{ inputs.dev == 'false' && inputs.release == 'false'}}
60+
uses: docker/build-push-action@v4
61+
with:
62+
context: ./docker/containers/docker_nightly
63+
push: true
64+
tags: ghcr.io/neuralmagic/sparseml-nightly:latest, ghcr.io/neuralmagic/sparseml-nightly:${{ steps.date.outputs.date }}

.github/workflows/build-nightly.yml

-22
This file was deleted.

.github/workflows/build-wheel-and-container.yml

+20-19
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,8 @@ on:
44
types: [opened, synchronize, reopened]
55
branches:
66
- main
7-
- 'release/[0-9]+.[0-9]+'
8-
push:
9-
branches:
10-
- 'release/[0-9]+.[0-9]+'
11-
- main
12-
release:
13-
types: [created, published]
147
schedule:
15-
- cron: '0 0 * * *'
8+
- cron: '0 20 * * *'
169

1710
permissions:
1811
id-token: write
@@ -23,10 +16,10 @@ concurrency:
2316
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
2417
cancel-in-progress: true
2518

26-
# if not dev or release, will create a nightly build
19+
# TODO: do we want to push to nightly everytime we push to main?
20+
# if not dev or release, will create a nightly build; turning off release for now
2721
env:
28-
PRODUCTION: ${{ github.event_name == 'schedule' || github.event_name == 'release'}}
29-
RELEASE: ${{ github.event_name =='release' || startsWith(github.base_ref, 'release/') }}
22+
RELEASE: 'false'
3023
DEV: ${{ github.base_ref == 'main' && github.event_name == 'pull_request'}}
3124

3225
jobs:
@@ -42,8 +35,14 @@ jobs:
4235
echo "dev=$DEV" >> $GITHUB_OUTPUT
4336
echo "release=$RELEASE" >> $GITHUB_OUTPUT
4437
45-
build-wheel-and-push:
38+
test-nightly:
4639
needs: set-outputs
40+
if: ${{ needs.set-outputs.outputs.dev == 'false' && needs.set-outputs.outputs.release == 'false'}}
41+
uses: ./.github/workflows/test-nightly.yml
42+
43+
build-wheel-and-push:
44+
needs: [set-outputs, test-nightly]
45+
if: ${{ always() && needs.set-outputs.outputs.dev == 'false' && needs.test-nightly.result == 'success' || always() && needs.set-outputs.outputs.dev == 'true' && needs.set-outputs.result == 'success' }}
4746
uses: ./.github/workflows/build-wheel.yml
4847
with:
4948
build-label: ubuntu-20.04
@@ -55,22 +54,24 @@ jobs:
5554
python: '3.10'
5655
secrets: inherit
5756

58-
test-wheel-and-push-internal:
59-
needs: build-wheel-and-push
60-
uses: ./.github/workflows/test-wheel-push-to-internal.yml
57+
test-wheel-and-publish:
58+
needs: [set-outputs, build-wheel-and-push]
59+
if: ${{ always() && !cancelled() && needs.build-wheel-and-push.result == 'success' }}
60+
uses: ./.github/workflows/test-wheel-and-publish.yml
6161
with:
6262
build-label: ubuntu-20.04
6363
whl: ${{ needs.build-wheel-and-push.outputs.wheel }}
6464
python: '3.10'
65+
dev: ${{ needs.set-outputs.outputs.dev }}
66+
release: ${{ needs.set-outputs.outputs.release }}
6567
secrets: inherit
6668

67-
# TODO: add nightly and release container build steps once wheel build push
68-
# to production is automated. Removed until then.
6969
build-container-and-push:
70-
needs: [set-outputs, test-wheel-and-push-internal]
70+
needs: [test-wheel-and-publish, set-outputs]
71+
if: ${{ always() && !cancelled() && needs.test-wheel-and-publish.result == 'success' }}
7172
uses: ./.github/workflows/build-container.yml
7273
with:
73-
build-label: k8s-eng-gpu-64G-v100-32G
74+
build-label: k8s-eng-gpu-16G-t4-32G
7475
dev: ${{ needs.set-outputs.outputs.dev }}
7576
release: ${{ needs.set-outputs.outputs.release }}
7677
name: ${{ github.event.number }}

.github/workflows/publish-nightly-docker-images.yaml

-79
This file was deleted.

.github/workflows/test-nightly.yml

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
name: Run Nightly Tests
22
on:
3-
schedule:
4-
- cron: '0 20 * * *'
53
workflow_dispatch:
4+
workflow_call:
65
jobs:
76
test-nightly-tests:
87
runs-on: k8s-mle-gpu-12-vcpu-225GB-ram-2-a6000-48G
@@ -33,6 +32,5 @@ jobs:
3332
run: |
3433
pytest tests/sparseml/transformers/obcq -m integration
3534
- name: Run finetune tests
36-
if: always()
3735
run: |
3836
pytest tests/sparseml/transformers/finetune -m integration

.github/workflows/test-wheel-push-to-internal.yml renamed to .github/workflows/test-wheel-and-publish.yml

+29-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Test Wheel and Push to Internal PyPi
1+
name: Test Wheel and Publish
22
on:
33
workflow_call:
44
inputs:
@@ -11,9 +11,15 @@ on:
1111
required: true
1212
python:
1313
type: string
14+
dev:
15+
type: string
16+
required: true
17+
release:
18+
type: string
19+
required: true
1420

1521
jobs:
16-
test-wheel-and-push-internal:
22+
test-wheel-and-publish:
1723
runs-on: ${{ inputs.build-label }}
1824
steps:
1925
- uses: actions/setup-python@v4
@@ -36,24 +42,37 @@ jobs:
3642
filename: ${{ inputs.whl }}
3743
dst: dist_s3
3844

39-
- name: Set Env
40-
run: |
41-
pip3 install virtualenv
42-
virtualenv venv
43-
source venv/bin/activate
44-
4545
- name: Fetch name of whl
4646
run: |
4747
echo "FILENAME=$(echo dist_s3/*.whl)" >> $GITHUB_ENV
4848
4949
- name: Install whl
5050
run: |
51-
pip3 install $FILENAME[dev]
51+
pip3 install $FILENAME[dev,onnxruntime,torch,torchvision,transformers]
5252
5353
- name: Checkout code
5454
uses: actions/checkout@v3
5555

5656
- name: Remove src files and run tests
5757
run: |
58+
pwd
5859
rm -rf src
59-
make test
60+
make test
61+
62+
- name: Make directory for wheel
63+
run: |
64+
mkdir dist_s3
65+
66+
- name: Pull from s3
67+
uses: neuralmagic/nm-actions/actions/s3_pull@main
68+
with:
69+
filename: ${{ inputs.whl }}
70+
dst: dist_s3
71+
72+
- name: Publish Nightly Wheel
73+
if: ${{ inputs.DEV == 'false' && inputs.RELEASE == 'false'}}
74+
uses: neuralmagic/nm-actions/actions/publish-whl@main
75+
with:
76+
username: ${{ secrets.PYPI_PUBLIC_USER }}
77+
password: ${{ secrets.PYPI_PUBLIC_AUTH }}
78+
whl: ./$FILENAME
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Creating a Quantized Llama Model in One Shot
2+
3+
Quantizing a model to a lower precision can save on both memory and speed at inference time.
4+
This example demonstrates how to use the SparseML API to quantize a Llama model from 16 bits
5+
to 4 bits and save it to a compressed-tensors format for inference with vLLM.
6+
7+
## Step 1: Select a model and dataset
8+
For this example, we will use a TinyLlama model and the open platypus dataset, however
9+
these can be swapped out for any huggingface compatible models and datasets
10+
11+
```python
12+
model = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
13+
dataset = "open_platypus"
14+
```
15+
16+
## Step 2: Configure a `GPTQModifier`
17+
Modifiers in sparseml are used to apply optimizations to models. In this example we use a
18+
`GPTQModifier` to apply the GPTQ algorithm to our model. We target all `Linear` layers
19+
for 4-bit weight quantization. These options may be swapped out for any valid `QuantizationScheme`.
20+
21+
```python
22+
from sparseml.modifiers.quantization.gptq import GPTQModifier
23+
24+
gptq = GPTQModifier(
25+
targets="Linear",
26+
scheme="W4A16"
27+
)
28+
```
29+
30+
31+
### Step3: One-Shot Compression
32+
33+
The `oneshot` api applies the created modifier to the target model and dataset.
34+
Setting `save_compressed` to True runs the model through `compressed_tensors` compression
35+
after the quantization is completed.
36+
37+
```python
38+
from sparseml.transformers import oneshot
39+
40+
oneshot(
41+
model=model,
42+
dataset=dataset,
43+
recipe=gptq,
44+
save_compressed=True,
45+
output_dir="llama-compressed-example",
46+
overwrite_output_dir=True,
47+
max_seq_length=256,
48+
num_calibration_samples=256,
49+
)
50+
```

0 commit comments

Comments
 (0)