diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dd9376a1..aa965688 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -14,7 +14,7 @@ A clear and concise description of what you expected to happen. **Environment** Include all relevant environment information: 1. OS [e.g. Ubuntu 18.04]: -2. Python version [e.g. 3.7]: +2. Python version [e.g. 3.8]: 3. Sparsify version or commit hash [e.g. 0.1.0, `f7245c8`]: 4. ML framework version(s) [e.g. torch 1.7.1]: 5. Other Python package versions [e.g. SparseZoo, DeepSparse, numpy, ONNX]: diff --git a/.github/workflows/build-docker-image.yaml b/.github/workflows/build-docker-image.yaml new file mode 100644 index 00000000..baf3992c --- /dev/null +++ b/.github/workflows/build-docker-image.yaml @@ -0,0 +1,72 @@ +name: Build and Publish Sparsify Release Docker Images + +on: + release: + types: [published] + +jobs: + build-and-push-docker-image: + name: Build and Push Version Tagged Docker Images to GitHub Container Registry + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + + - name: Set up Docker Buildx + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + id: buildx + uses: docker/setup-buildx-action@v2 + with: + buildkitd-flags: --debug + + - name: Login to Github Packages + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout code + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Get Tag + id: extract_tag + run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME#*/})" + + - name: Current Version Name + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + run: | + echo ${{ steps.extract_tag.outputs.tag }} + + - name: Build and push sparsify release ${{ steps.extract_tag.outputs.tag }} docker image + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + uses: docker/build-push-action@v2 + with: + context: ./docker + build-args: | + REF=release/${{ steps.extract_tag.outputs.tag }} + push: true + tags: | + ghcr.io/neuralmagic/sparsify:${{ steps.extract_tag.outputs.tag }} + + + + - name: Image digest + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/build-nightly-docker-image.yaml b/.github/workflows/build-nightly-docker-image.yaml new file mode 100644 index 00000000..aa28d3f0 --- /dev/null +++ b/.github/workflows/build-nightly-docker-image.yaml @@ -0,0 +1,58 @@ +name: Build and Publish Sparsify Release Docker Images + +on: + push: + branches: + - 'main' + +jobs: + build-and-push-docker-image: + name: Build and Push Version Tagged Docker Images to GitHub Container Registry + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + with: + buildkitd-flags: --debug + + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Login to Github Packages + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + + - name: Build and push sparsify-nightly docker image + uses: docker/build-push-action@v2 + with: + context: ./docker + build-args: | + REF=main + push: true + tags: | + ghcr.io/neuralmagic/sparsify-nightly:latest + + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/DEVELOPING.md b/DEVELOPING.md index 25d27350..0fd7d95f 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -16,7 +16,7 @@ limitations under the License. # Developing Sparsify -Sparsify is developed and tested using Python 3.7-3.9. +Sparsify is developed and tested using Python 3.8-3.9. To develop Sparsify, you will also need the development dependencies and to follow the styling guidelines. Here's some details to get started. diff --git a/MANIFEST.in b/MANIFEST.in index aafd306d..daded415 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,4 @@ recursive-include src/sparsify/ui/ * include LICENSE +include src/sparsify/auto/tasks/deployment_instructions.md +include src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml diff --git a/README.md b/README.md index 992fc58e..b9485b67 100644 --- a/README.md +++ b/README.md @@ -31,246 +31,359 @@ See the License for the specific language governing permissions and limitations under the License. --> -

tool icon  Sparsify [Alpha]

+

Sparsify tool icon  Sparsify [Alpha]

-

ML model optimization product to accelerate inference.

+

ML model optimization product to accelerate inference

Documentation - + - + - Main + Main - GitHub release + GitHub release Stability - GitHub + GitHub - Contributor Covenant + Contributor Covenant - + - + - +

-![Logo](https://drive.google.com/uc?id=1XnlBKpRQdsnLC4IPoiCoihXJNFh8y7OL) +**🚨 July 2023: Sparsify's next generation is now in alpha as of version 1.6.0!** -## Overview +Sparsify enables you to accelerate inference without sacrificing accuracy by applying state-of-the-art pruning, quantization, and distillation algorithms to neural networks with a simple web application and one-command API calls. -Sparsify enables you to accelerate inference without sacrificing accuracy by applying state-of-the-art pruning, quantization, and distillation algorithms to Neural Networks with a simple web app and one-command API calls. +Sparsify empowers you to compress models through two components: +- **[Sparsify Cloud](https://apps.neuralmagic.com/sparsify/)** - a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. +- **Sparsify CLI/API** - a Python package and GitHub repository that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate them into your workflows. -To empower you in compressing models, Sparsify is made up of two components: the Sparsify Cloud and the Sparsify CLI/API. -The Sparsify Cloud is a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. -The Sparsify CLI/API is a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate into your own workflows. +## Table of Contents +- [Quickstart Guide](#quickstart-guide) + - [1. Install and Setup](#1-install-and-setup) + - [2. Run an Experiment](#2-run-an-experiment) + - [3. Compare Results](#3-compare-results) + - [4. Deploy a Model](#4-deploy-a-model) +- [Companion Guides](#companion-guides) +- [Resources](#resources) -To get started immediately, [create an account](https://account.neuralmagic.com/signup) and then check out the [Installation](https://github.com/neuralmagic/sparsify/edit/Sparsify-Alpha-README/README.md#installation) and [Quick Start](https://github.com/neuralmagic/sparsify/edit/Sparsify-Alpha-README/README.md#quick-start) sections of this README. -With all of that setup, sparsifying your models is as easy as: +## Quickstart Guide -```bash -sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 50 --train-kwargs '{"dataset": "imagenette"}' -``` - -
- -*🚨**Note**🚨: Sparsify is currently an alpha release, so you have the opportunity to influence the development process for the product. -You can report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), [email](mailto::rob@neuralmagic.com) or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). -As an alpha release, limited support is provided through the community with GitHub Issues and Slack, APIs and UIs are subject to change, and the product is not yet ready for production use. -Thank you for your interest and support!* + +Interested in test-driving our alpha? +Get a sneak peek and influence the product's development process. +Thank you in advance for your feedback and interest! + -## Installation - -`pip` is the preferred method for installing Sparsify. -It is advised to create a fresh [virtual environment](https://docs.python.org/3/library/venv.html) to avoid dependency issues. - -Install with pip using: +This quickstart details several pathways you can work through. +We encourage you to explore one for Sparsify's full benefits. +When you finish the quickstart, sparsifying your models is as easy as: ```bash -pip install sparsify +sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 0.5 ``` -### Hardware and Software Requirements +### 1. Install and Setup -Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. +#### 1.1 Verify Prerequisites -Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. -We recommend you use a Linux system with a GPU that has a minimum of 16 GB of GPU Memory, 128 GB of RAM, 4 cores, and is CUDA-enabled. If you are sparsifying a very large model, you may need more RAM than the recommended 128 GB. -If you encounter issues setting up your training environment, file a GitHub issue [here]( https://github.com/neuralmagic/sparsify/issues). +First, verify that you have the correct software and hardware to run the Sparsify Alpha. -## Quick Start +
+Software -We'll show you how to: +Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. +Sparsify is not supported natively on Windows and MAC OS. -1. Create a Neural Magic Account. -2. Install Sparsify in your local training environment. -3. Login utilizing your API key. -4. Run an Experiment. -5. Compare the Experiment results. +Additionally, for installation from PyPi, pip 20.3+ is required. +
-### Create a Neural Magic Account +
+Hardware -Creating a new account is simple and free. -An account is required to manage your Experiments and API keys. -Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and a unique password. +Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. +We recommend you use a Linux system with a GPU that has a minimum of 16GB of GPU Memory, 128GB of RAM, 4 CPU cores, and is CUDA-enabled. +If you are sparsifying a very large model, you may need more RAM than the recommended 128GB. +If you encounter issues setting up your training environment, [file a GitHub issue](https://github.com/neuralmagic/sparsify/issues). +
+ +#### 1.2 Create an Account + +Creating a new one-time account is simple and free. +An account is required to manage your Experiments and API keys. +Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. -For more details, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/docs/cloud-user-guide.md). +Sparsify Sign In -### Install Sparsify +#### 1.3 Install Sparsify -Next, you'll need to install Sparsify on your training hardware. -To do this, run the following command: +`pip` is the preferred method for installing Sparsify. +It is advised to create a [fresh virtual environment](https://docs.python.org/3/library/venv.html) to avoid dependency issues. +Install with pip using: ```bash -pip install sparsify +pip install sparsify-nightly ``` -For more details and system/hardware requirements, see the [Installation](#Installation) section. +#### 1.4 Log in via CLI -### Login to Sparsify - -With Sparsify installed on your training hardware, you'll need to authorize the local CLI to access your account. -This is done by running the `sparsify.login` command and providing your API key. -Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal. -Once you have located this, copy the command or the API key itself and run the following command: +Next, with Sparsify installed on your training hardware: +1. Authorize the local CLI to access your account by running the sparsify.login command and providing your API key. +2. Locate your API key on the homepage of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the 'Get set up' modal, and copy the command or the API key itself. +3. Run the following command: ```bash sparsify.login API_KEY -```` - -For more details on locating the API_KEY, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/docs/cloud-user-guide.md). - -For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). - -### Run an Experiment - -Experiments are the core of sparsifying a model. -They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. -All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. -To run an Experiment, you can use either the CLI or the API depending on your use case. -The Sparsify Cloud provides a UI for exploring hyperparameters, predicting performance, and generating the desired CLI/API command. -For more info on generating commands from the Sparsify Cloud, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/docs/cloud-user-guide.md). +``` -The general command for running an Experiment is: +### 2. Run an Experiment -```bash -sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL -``` +Experiments are the core of sparsifying a model. +They allow you to apply sparsification algorithms to a dataset and model through the three Experiment types detailed below: +- [One-Shot](#21-one-shot) +- [Training-Aware](#22-training-aware) +- [Sparse-Transfer](#23-sparse-transfer) -Where the values for each of the arguments follow these general rules: -- EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`; see the examples below for more details or the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). -- USE_CASE: the use case you're solving for such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/docs/use-cases-guide.md). -- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/docs/models-guide.md). -- DATA: the dataset you want to use to the sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and HuggingFace datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/docs/datasets-guide.md). -- OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/docs/optim-levels-guide.md). +All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison, using Sparsify's two components: +- Sparsify Cloud - explore hyperparameters, predict performance, and generate the desired CLI/API command. +- Sparsify CLI/API - run an experiment. -#### Running One-Shot +#### 2.1 One-Shot | Sparsity | Sparsification Speed | Accuracy | |----------|----------------------|----------| | **++** | **+++++** | **+++** | -One-Shot Experiments are the quickest way to create a faster and smaller version of your model. -The algorithms are applied to the model post training utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. +One-Shot Experiments quickly sparsify your model post-training, providing a 3-5x speedup with minimal accuracy loss, ideal for quick model optimization without retraining your model. -Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. -They are ideal for when you want to quickly sparsify your model and don't have a lot of time to spend on the sparsification process. +To run a One-Shot Experiment for your model, dataset, and use case, use the following command: +```bash +sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATASET --optim-level OPTIM_LEVEL +``` -CV Example: +For example, to sparsify a ResNet-50 model on the ImageNet dataset for image classification, run the following commands: ```bash -sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +wget https://public.neuralmagic.com/datasets/cv/classification/imagenet_calibration.tar.gz +tar -xzf imagenet_calibration.tar.gz +sparsify.run one-shot --use-case image_classification --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none" --data ./imagenet_calibration --optim-level 0.5 ``` -NLP Example: +Or, to sparsify a BERT model on the SST2 dataset for sentiment analysis, run the following commands: ```bash -sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +wget https://public.neuralmagic.com/datasets/nlp/text_classification/sst2_calibration.tar.gz +tar -xzf sst2_calibration.tar.gz +sparsify.run one-shot --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data ./sst2_calibration --optim-level 0.5 ``` -#### Running Sparse-Transfer +To dive deeper into One-Shot Experiments, read through the [One-Shot Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot-experiment-guide.md). + + +Note, One-Shot Experiments currently require the model to be in an ONNX format and the dataset to be in a NumPy format. +More details are provided in the One-Shot Experiment Guide. + + +#### 2.2 Sparse-Transfer | Sparsity | Sparsification Speed | Accuracy | |----------|----------------------|-----------| | **++++** | **++++** | **+++++** | -Sparse-Transfer Experiments are the second quickest way to create a faster and smaller model for your dataset. -Sparse, foundational models are sparsified in a Training-Aware manner on a large dataset such as ImageNet. -Then, the sparse patterns are transferred to your dataset through a fine-tuning process. +Sparse-Transfer Experiments quickly create a smaller and faster model for your dataset by transferring from a [SparseZoo](https://sparsezoo.neuralmagic.com/) pre-sparsified foundational model, providing a 5-10x speedup with minimal accuracy loss, ideal for quick model optimization without retraining your model. -Generally, Sparse-Transfer Experiments result in a 5-10x speedup with minimal accuracy loss. -They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. -Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. +To run a Sparse-Transfer Experiment for your model (optional), dataset, and use case, run the following command: +```bash +sparsify.run sparse-transfer --use-case USE_CASE --model OPTIONAL_MODEL --data DATASET --optim-level OPTIM_LEVEL +``` -CV Example: +For example, to sparse transfer a SparseZoo model to the Imagenette dataset for image classification, run the following command: ```bash sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 ``` -NLP Example: +Or, to sparse transfer a SparseZoo model to the SST2 dataset for sentiment analysis, run the following command: ```bash sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 ``` -#### Running Training-Aware +To dive deeper into Sparse-Transfer Experiments, read through the [Sparse-Transfer Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer-experiment-guide.md). + + +Note, Sparse-Transfer Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or Hugging Face Transformers. +Datasets must additionally match the expected format of the underlying integration. +More details and exact formats are provided in the Sparse-Transfer Experiment Guide. + + +#### 2.3 Training-Aware | Sparsity | Sparsification Speed | Accuracy | |-----------|-----------------------|-----------| | **+++++** | **++** | **+++++** | -Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. -The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. -However, they do require additional training time and hyperparameter tuning to achieve the best results. +Training-aware Experiments sparsify your model during training, providing a 6-12x speedup with minimal accuracy loss, ideal for thorough model optimization when the best performance and accuracy are required. -Generally, Training-Aware Experiments result in a 6-12x speedup with minimal accuracy loss. -They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. -Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. +To run a Training-Aware Experiment for your model, dataset, and use case, run the following command: +```bash +sparsify.run training-aware --use-case USE_CASE --model OPTIONAL_MODEL --data DATASET --optim-level OPTIM_LEVEL +``` -CV Example: +For example, to sparsify a ResNet-50 model on the Imagenette dataset for image classification, run the following command: ```bash -sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +sparsify.run training-aware --use-case image_classification --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" --data imagenette --optim-level 0.5 ``` -NLP Example: +Or, to sparsify a BERT model on the SST2 dataset for sentiment analysis, run the following command: ```bash -sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +sparsify.run training-aware --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data sst2 --optim-level 0.5 ``` -### Compare the Results +To dive deeper into Training-Aware Experiments, read through the [Training-Aware Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware-experiment-guide.md). + + +Note that Training-Aware Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or Hugging Face Transformers. +Datasets must additionally match the expected format of the underlying integration. +More details and exact formats are provided in the Training-Aware Experiment Guide. + + +### 3. Compare Results + +Once you have run your Experiment, the results, logs, and deployment files will be saved under the current working directory in the following format: +```text +[EXPERIMENT_TYPE]_[USE_CASE]_{DATE_TIME} +├── deployment +│ ├── model.onnx +│ └── *supporting files* +├── logs +│ ├── *logs* +├── training_artifacts +│ ├── *training artifacts* + ├── *metrics and results* +``` + +You can compare the accuracy by looking through the metrics printed out to the console and the metrics saved in the experiment directory. +Additionally, you can use [DeepSparse](https://github.com/neuralmagic/deepsparse) to compare the inference performance on your CPU deployment hardware. -Once you have run your Experiment, you can compare the results printed out to the console. -In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. + +Note: In the near future, you will be able to visualize the results in Sparsify Cloud, simulate other scenarios and hyperparameters, compare the results to other Experiments, and package for your deployment scenario. + -The results will look something like this: +To run a benchmark on your deployment hardware, use the `deepsparse.benchmark` command with your original model and the new optimized model. +This will run a number of inferences to simulate a real-world scenario and print out the results. + +It's as simple as running the following command: ```bash -Sparsify Results: -TODO +deepsparse.benchmark --model MODEL --scenario SCENARIO ``` -### Package for Deployment +For example, to benchmark a dense ResNet-50 model, run the following command: +```bash +deepsparse.benchmark --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" --scenario sync +``` -Landing soon! +This can then be compared to the sparsified ResNet-50 model with the following command: +```bash +deepsparse.benchmark --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none" --scenario sync +``` + +The output will look similar to the following: +```text +DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) +deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: + onnx_file_path: ./model.onnx + batch_size: 1 + num_cores: 1 + num_streams: 1 + scheduler: Scheduler.default + fraction_of_supported_ops: 0.9981 + cpu_avx_type: avx512 + cpu_vnni: False +=Original Model Path: ./model.onnx +Batch Size: 1 +Scenario: sync +Throughput (items/sec): 134.5611 +Latency Mean (ms/batch): 7.4217 +Latency Median (ms/batch): 7.4245 +Latency Std (ms/batch): 0.0264 +Iterations: 1346 +``` + +See the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md) for more information on benchmarking. + +### 4. Deploy a Model + +As an optional step to this quickstart, now that you have your optimized model, you are ready for inferencing. +To get the most inference performance out of your optimized model, we recommend you deploy on Neural Magic's [DeepSparse](https://docs.neuralmagic.com/deepsparse). +DeepSparse is built to get the best performance out of optimized models on CPUs. + +DeepSparse Server takes in a task and a model path and will enable you to serve models and `Pipelines` for deployment in HTTP. + +You can deploy any ONNX model using DeepSparse Server with the following command: +```bash +deepsparse.server --task USE_CASE --model_path MODEL_PATH +``` + +Where `USE_CASE` is the use case of your Experiment and `MODEL_PATH` is the path to the deployment folder from the Experiment. + +For example, to deploy a sparsified ResNet-50 model, run the following command: +```bash +deepsparse.server --task image_classification --model_path "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none" +``` + +If you're not ready for deploying, congratulations on completing the quickstart! + +## Companion Guides + +- [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) +- [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) +- [Sparsify Models Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) +- [One-Shot Experiments Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot-experiment-guide.md) +- [Sparse-Transfer Experiments Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer-experiment-guide.md) +- [Training-Aware Experiments Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware-experiment-guide.md) ## Resources +Now that you have explored Sparsify [Alpha], here are other related resources. + +### Feedback and Support + +Report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the Sparsify team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). Alpha support is provided through those channels. + +### Terms and Conditions + +Sparsify Alpha is a pre-release version of Sparsify that is still in active development. +The product is not yet ready for production use; APIs and UIs are subject to change. +There may be bugs in the Alpha version, which we hope to have fixed before Beta and then a general Q3 2023 release. +The feedback you provide on quality and usability helps us identify issues, fix them, and make Sparsify even better. +This information is used internally by Neural Magic solely for that purpose. +It is not shared or used in any other way. + +That being said, we are excited to share this release and hear what you think. +Thank you in advance for your feedback and interest! + ### Learning More -- Documentation: [SparseML,](https://docs.neuralmagic.com/sparseml/) [SparseZoo,](https://docs.neuralmagic.com/sparsezoo/) [Sparsify,](https://docs.neuralmagic.com/archive/sparsify/) [DeepSparse](https://docs.neuralmagic.com/deepsparse/) +- Documentation: [SparseML](https://docs.neuralmagic.com/sparseml/), [SparseZoo](https://docs.neuralmagic.com/sparsezoo/), [Sparsify](https://docs.neuralmagic.com/sparsify/), [DeepSparse](https://docs.neuralmagic.com/deepsparse/) - Neural Magic: [Blog,](https://www.neuralmagic.com/blog/) [Resources](https://www.neuralmagic.com/resources/) ### Release History @@ -302,7 +415,8 @@ For more general questions about Neural Magic, please fill out this [form.](http ### Cite -Find this project useful in your research or other communications? Please consider citing: +Find this project useful in your research or other communications? +Please consider citing: ```bibtex @InProceedings{ diff --git a/docker/Dockerfile b/docker/Dockerfile index b68fef09..6e0ed958 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,25 +1,15 @@ -# Setup the base image -FROM python:3.8-slim-bullseye +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 -# Install git -RUN : \ - && apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends git \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +ARG DEBIAN_FRONTEND=noninteractive -# Activate venv -RUN python3.8 -m venv /venv -ENV PATH="venv/bin:$PATH" +RUN apt-get update && apt-get install --no-install-recommends -y \ + git python3 python3-dev python3-venv python3-pip python3-wheel build-essential && \ + apt-get clean && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade setuptools wheel -# Setup DeepSparse +ARG REF=main +RUN git clone https://github.com/neuralmagic/sparsify && cd sparsify && git checkout $REF +RUN python3 -m pip install --upgrade pip && \ + python3 -m pip install --no-cache-dir -e ./sparsify -ARG GIT_CHECKOUT -# if $GIT_CHECKOUT is not specified - just install from pypi -RUN if [ -z "${GIT_CHECKOUT}" ] ; then pip3 install --no-cache-dir --upgrade deepsparse[server] ; fi - -# if $GIT_CHECKOUT is specified - clone, checkout $GIT_CHECKOUT, and install with -e -RUN if [ -n "${GIT_CHECKOUT}" ] ; then git clone https://github.com/neuralmagic/deepsparse.git --depth 1 -b $GIT_CHECKOUT; fi -RUN if [ -n "${GIT_CHECKOUT}" ] ; then pip3 install --no-cache-dir --upgrade -e "./deepsparse[server]" ; fi +CMD ["/bin/bash"] diff --git a/docs/cli-api-guide.md b/docs/cli-api-guide.md deleted file mode 100644 index fa9bb319..00000000 --- a/docs/cli-api-guide.md +++ /dev/null @@ -1,18 +0,0 @@ - - -# Sparsify CLI/API Guide -Landing Soon! diff --git a/docs/cloud-user-guide.md b/docs/cloud-user-guide.md index b5b16501..72e44f70 100644 --- a/docs/cloud-user-guide.md +++ b/docs/cloud-user-guide.md @@ -22,79 +22,80 @@ The Sparsify Cloud is a web application that allows you to create and manage Spa In this Sparsify Cloud User Guide, we will show you how to: 1. Create a Neural Magic Account. 2. Install Sparsify in your local training environment. -3. Login utilizing your API key. -4. Run an Experiment +3. Log in using your API key. +4. Run an Experiment. 5. Compare the Experiment results. -## Create a Neural Magic Account +## Creating a Neural Magic Account Creating a new account is simple and free. An account is required to manage your Experiments and API keys. -Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and a unique password. +Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. [![SignIn](https://drive.google.com/uc?id=1RInSrLsfm0PQLEkjJqD1HzaCWA2yDcNi)](https://drive.google.com/uc?id=1RInSrLsfm0PQLEkjJqD1HzaCWA2yDcNi) -## Install Sparsify in your local training environment +## Installing Sparsify in Your Local Training Environment -Next, you'll need to install Sparsify on your training hardware. -To do this, run the following command: +Next, install Sparsify on your training hardware by running the following command: ```bash -pip install sparsify +pip install sparsify-nightly ``` -For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify/README.md#installation) section. +For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify/blob/main/README.md#installation) section. -You may copy the command from the Sparsify Cloud in step 1 and run that in your training environment to install Sparsify. +You may copy the command from the Sparsify Cloud in Step 1 in the following screenshot and run that in your training environment to install Sparsify. [![Homepage](https://drive.google.com/uc?id=10U3r7lr4fmdKLG_xzRys2avdf2g2GVIN)](https://drive.google.com/uc?id=10U3r7lr4fmdKLG_xzRys2avdf2g2GVIN) -## Login utilizing your API key +## Log in Utilizing Your API key With Sparsify installed on your training hardware, you'll need to authorize the local CLI to access your account. This is done by running the `sparsify.login` command and providing your API key. -Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify/) under the **'Get set up'** modal. +Locate your API key on the homepage of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify/) under the **'Get set up'** modal. Once you have located this, copy the command or the API key itself and run the following command: ```bash sparsify.login API_KEY ```` -You may copy the command from the Sparsify Cloud in step 2 and run that in your training environment after installing Sparsify to log in via the Sparsify CLI. For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). +You may copy the command from the Sparsify Cloud in Step 2 and run that in your training environment after installing Sparsify to log in via the Sparsify CLI. For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). -## Run an Experiment +## Running an Experiment Experiments are the core of sparsifying a model. They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. -All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. +All Experiments are run locally on your training hardware and can be synced with Sparsify Cloud for further analysis and comparison. -To run an Experiment, use the Sparsify Cloud to generate a code command to run in your training environment.: +To run an Experiment, use the Sparsify Cloud to generate a code command to run in your training environment: 1. Click on 'Start Sparsifyng' in the top right corner of the Sparsify Cloud Homepage to bring up the ```Sparsify a model``` modal. ![Sparsify a model](https://drive.google.com/uc?id=1FyayVSqq5YtKO_dEgt5iMNSZQNsqaQFq) -3. Select a Use Case for your model. Note that if your use case is not present in the dropdown, fear not; the use case does not affect the optimization of the model. -4. Choose the Experiment Type. To learn more about the Experiments, see the [Sparsify README](https://github.com/neuralmagic/sparsify/README.md#run-an-experiment). -5. Adjust the Hyperparameter Compression slider to designate whether you would like to to optimize the model for performance, accuracy, or a balance of both. Note that selecting an extreme on the slider will not completely tank the opposing metric. -6. Click 'Generate Code Snippet' to view the code snipppet generated from your sparsification selections on the next modal. +3. Select a use case for your model. Note that if your use case is not present in the dropdown, fear not; the use case does not affect the optimization of the model. +4. Choose the Experiment Type. To learn more about the Experiments, see the [Sparsify README](https://github.com/neuralmagic/sparsify/blob/main/README.md#run-an-experiment). +5. Adjust the Hyperparameter Compression slider to designate whether you would like to optimize the model for performance, accuracy, or a balance of both. Note that selecting an extreme on the slider will not completely tank the opposing metric. +6. Click 'Generate Code Snippet' to view the code snippet generated from your sparsification selections on the next modal. ![Generate Code Snippetl](https://drive.google.com/uc?id=14B193hHeYqLeSX8r6C5N1G8beBeXUkYE) - 7. Once your code snippet is generated, make sure you have installed Sparsify and are logged in via the CLI. 8. Copy the code snippet and fill in the paths to your local dense model and/or training dataset as prompted. 9. Run the command and wait for your sparse model to complete. You have now completed running an Experiment with Sparsify. ![Generate Code Snippetl](https://drive.google.com/uc?id=1xWrla3ps0qeS70P1bzOIYGeIPXWgHfF_) -To learn more about the arguments for the `sparsify.run` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). +To learn more about the arguments for the `sparsify.run` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). + +## Comparing the Experiment Results +Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. +In the near future, you will be able to compare the results in Sparsify Cloud, measure other scenarios, and compare the results to other Experiments. -## Compare the Experiment results To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. @@ -108,5 +109,33 @@ deepsparse.benchmark zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/s ``` -*Note: performance improvement is not guaranteed across all runtimes and hardware types.* +The results will look something like this: +```bash +2023-06-30 15:20:41 deepsparse.benchmark.benchmark_model INFO Thread pinning to cores enabled +downloading...: 100%|████████████████████████| 105M/105M [00:18<00:00, 5.81MB/s] +DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) +[7ffba5a84700 >WARN< operator() ./src/include/wand/utility/warnings.hpp:14] Generating emulated code for quantized (INT8) operations since no VNNI instructions were detected. Set NM_FAST_VNNI_EMULATION=1 to increase performance at the expense of accuracy. +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: + onnx_file_path: /home/rahul/.cache/sparsezoo/neuralmagic/obert-base-sst2_wikipedia_bookcorpus-pruned90_quantized/model.onnx + batch_size: 1 + num_cores: 10 + num_streams: 1 + scheduler: Scheduler.default + fraction_of_supported_ops: 0.9981 + cpu_avx_type: avx512 + cpu_vnni: False +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'input_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'attention_mask', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'token_type_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO Starting 'singlestream' performance measurements for 10 seconds +Original Model Path: zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none +Batch Size: 1 +Scenario: sync +Throughput (items/sec): 134.5611 +Latency Mean (ms/batch): 7.4217 +Latency Median (ms/batch): 7.4245 +Latency Std (ms/batch): 0.0264 +Iterations: 1346 +``` +*Note: performance improvement is not guaranteed across all runtimes and hardware types.* diff --git a/docs/datasets-guide.md b/docs/datasets-guide.md index 6c68115e..b95d0b9d 100644 --- a/docs/datasets-guide.md +++ b/docs/datasets-guide.md @@ -17,174 +17,248 @@ limitations under the License. # Sparsify Datasets Guide For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. -Due to the varied ML pipelines and implementations, Sparsify standardizes on a few, popular formats for datasets. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. You will need to make sure that your data is formatted properly according to the standards listed below. -## Predefined Use Cases +## Table of Contents -### Training Aware and Sparse Transfer +1. [Image Classification](#image-classification) +2. [Object Detection](#object-detection) +3. [Image Segmentation](#image-segmentation) +4. [NLP](#nlp) +5. [NPZ](#npz) +6. [Custom](#custom) -Training Aware and Sparse Transfer utilize specific dataset standards depending on the use case. -Each one is listed below with an example. +## Image Classification -#### Image Classification +For image classification tasks, Sparsify relies on the standard `SPLIT/CLASS/IMAGE` format used by the PyTorch ImageFolder class. -For image classification tasks, Sparsify relies on the dataset format standard used by the PyTorch ImageFolder class. -This format is fairly simple and intuitive, and it is also widely used in the machine learning community. - -##### Specifications - -- The root folder should contain subdirectories, each representing a single class of images. +### Specifications +- The root folder should contain `train` and `val` subdirectories, each representing the training and validation splits of the dataset. +- Each split should contain subdirectories, each representing a single class of images. - Images of a particular class/category should be placed inside the corresponding subdirectory. - The subdirectory name is used as the class label and should be unique for each class. - The images should be in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. - Images do not need to be of the same size. -The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. -Therefore, it is crucial to ensure the directories are properly named to avoid any confusion or mislabeling. - -##### Example - -For an image classification task involving dogs and cats, the dataset directory should be structured as follows: +The root directory containing the splits data samples should be passed to the CLI as the `--data` argument. +### Structure +```text +data +├── train +│ ├── class_1 +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ ├── class_2 +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ └── ... +└── val + ├── class_1 + │ ├── image_1.png + │ ├── image_2.png + │ └── ... + ├── class_2 + │ ├── image_1.png + │ ├── image_2.png + │ └── ... + └── ... ``` -root/dog/xxx.png -root/dog/xxy.png -root/dog/xxz.png -root/cat/123.png -root/cat/nsa.png -root/cat/asd.png -``` +For more details and examples on creating image classification datasets for Sparsify, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). + +### Example -In this example, all images within the 'dog' subdirectory will be labeled as 'dog', and all images within the 'cat' subdirectory will be labeled as 'cat'. -The exact filenames ('xxx.png', 'xxy.png', etc.) do not matter; what matters is the directory structure and the directory names. -By organizing the data in this way, it can be easily read and labeled by the PyTorch ImageFolder class, and thus easily used for training image classification models in Sparsify. -Please note, the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. -Here, 'cat' will be considered class 0 and 'dog' will be class 1, due to alphabetical order. +## Object Detection -#### Object Detection / Image Segmentation +For object detection tasks, Sparsify utilizes the YOLO format for datasets. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/datasets/detect/) +The format is made up of a YAML file containing the root dataset location, the classes, and the training and validation split locations. -For object detection and image segmentation tasks, Sparsify supports the dataset format used by YOLOv5. -This format is specifically designed for tasks involving bounding boxes and segmentation masks, and is widely adopted in the community. +If a directory is supplied instead and there is no YAML file within the directory, Sparsify will automatically create one for you. +To auto create a YAML file, the directory structure must be the same as listed below in addition to containing a classes.txt file which contains the class names with one per line. -##### Specifications +### Specifications +- The root folder should contain `labels` and `images` subdirectories. +- Underneath both the `labels` and `images` directories, there should be `train` and `val` subdirectories, each representing the training and validation splits of the dataset. +- The split directories under `labels` should contain the YOLO format label files with a single `.txt` file per image. +- The text files underneath the `labels` directories should contain a single line per object of the format `class_index x_center y_center width height` where the coordinates are normalized between 0 and 1 and the class numbers are zero-indexed. +- The split directories under `images` should contain the images of any size in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. +- Each image file must have a corresponding label file with the same name in the `labels` directory. +- If supplying a directory without a YAML file, the directory must also contain a `classes.txt` file with one class name per line in the same order as the class numbers in the label files. -- Images should be stored in a common directory, generally named `images`. -- Annotations for the images should be stored in a separate directory, often named `labels`. -- Images can be in formats readable by OpenCV (e.g. .jpg, .png). -- Each image should have a corresponding annotation file. The annotation files should be in plain text format (.txt). -- The name of the annotation file should be the same as the corresponding image file, except with a .txt extension. -- Annotation files for object detection should contain one line for each object in the image. Each line should be in the format: ` `, where the values are normalized relative to the size of the image. -- Annotation files for image segmentation should contain information about the segmentation masks. +### Structure +```text +data +├── images +│ ├── train +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ ├── val +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ └── ... +├── labels +│ ├── train +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ ├── val +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ └── ... +├── classes.txt +└── dataset.yaml +``` -##### Example +For more details and examples on creating object detection datasets for Sparsify, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). -For an object detection task involving detecting cars and pedestrians, the dataset directory should be structured as follows: +### Example -``` -dataset/ -├── images/ -│ ├── image1.jpg -│ └── image2.jpg -└── labels/ - ├── image1.txt - └── image2.txt -``` -For `image1.jpg`, if there's a car and a pedestrian in the image, the corresponding `image1.txt` file could look like this: +## Image Segmentation -``` -0 0.5 0.6 0.2 0.3 -1 0.7 0.8 0.1 0.2 -``` +For image segmentation tasks, Sparsify utilizes the YOLO format for datasets. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/datasets/segment/) +The format is made up of a YAML file containing the root dataset location, the classes, and the training and validation split locations. -This would mean that there is an object of class 0 (car) centered at (50% of image width, 60% of image height) and having a width of 20% of the image width and height 30% of the image height. -The second line is similar but for an object of class 1 (pedestrian). +If a directory is supplied instead and there is no YAML file within the directory, Sparsify will automatically create one for you. +To auto create a YAML file, the directory structure must be the same as listed below in addition to containing a classes.txt file which contains the class names with one per line. -For image segmentation, the labels might be more complex, including segmentation masks that indicate which pixels belong to which object category. +### Specifications +- The root folder should contain `annotations` and `images` subdirectories. +- Underneath both the `annotations` and `images` directories, there should be `train` and `val` subdirectories, each representing the training and validation splits of the dataset. +- The split directories under `annotations` should contain the YOLO format annotation files with a single `.txt` file per image. +- The text files underneath the `annotations` directories should contain a single line per object of the format `class_index x_1 y_1 x_2 y_2 x_3 y_3` where the coordinates that bound the object are normalized between 0 and 1 and the class numbers are zero-indexed. +- The split directories under `images` should contain the images of any size in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. +- Each image file must have a corresponding annotation file with the same name in the `annotations` directory. +- If supplying a directory without a YAML file, the directory must also contain a `classes.txt` file with one class name per line in the same order as the class numbers in the annotation files. -Make sure the class labels are consistent with what is expected by the YOLOv5 configuration you are using, and that the bounding box coordinates are normalized as described above. +### Structure +```text +data +├── images +│ ├── train +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ ├── val +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ └── ... +├── annotations +│ ├── train +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ ├── val +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ └── ... +├── classes.txt +└── dataset.yaml +``` -#### Natural Language (NLP/NLG) +For more details and examples on creating segmentation datasets for Sparsify, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). -For natural language processing (NLP) and natural language generation (NLG) tasks, Sparsify supports the dataset formats used by the Hugging Face library. -Hugging Face datasets can be represented in various file formats including JSON, CSV, and JSON lines format (.jsonl). +### Example -##### Specifications -- Each row or line in your data file should represent a single example. -- The data must include the features necessary for your task. For example, a dataset for text classification might include 'text' and 'label' fields. -- For JSON files, each line should be a separate, self-contained JSON object. -- For CSV files, the first row should include the column names, and each subsequent row should include the fields for a single example. -- The file should be UTF-8 encoded to support a wide range of text inputs. +## NLP -##### Example +For NLP tasks, Sparsify utilizes the HuggingFace [Datasets](https://huggingface.co/docs/datasets/) format and expectations. +Hugging Face datasets can be represented in various file formats, including CSV, and JSON lines format (.jsonl). -Here's an example of how you might structure a dataset for a sentiment analysis task: +Specifications: +- The root folder should contain JSON or CSV files associated with each split of the dataset. +- The JSON or CSV files must be named such that the training data contains the word `train`, validation data contains the word `val`, and any optional test data contains the word `test`. +- For JSON files, each line must be a JSON object representing a single data sample. +- For CSV files, the first row must be a header row containing the column names. +- The label column must be named `label`. +- The features column will be dynamically determined based on the column names and the rules below + - If both `setence1` and `sentence2` are present, these columns will be taken as the features. + - Otherwise the first non label columns will be used for the features with sentence1 being set to the first column and setence2 being set to the second if present. +- The files should be UTF-8 encoded. -If you're using a JSON lines (.jsonl) format, your file could look like this: +### Structure +#### JSON +```text +data +├── train.json +├── val.json +└── test.json ``` + +Where the contents of each JSON file would look like the following: +```text {"text": "I love this movie!", "label": "positive"} {"text": "This movie was awful.", "label": "negative"} {"text": "I have mixed feelings about this film.", "label": "neutral"} ``` -Each line is a separate JSON object, representing a single example. - -If you're using a CSV format, your file could look like this: - +#### CSV +```text +data +├── train.csv +├── val.csv +└── test.csv ``` + +Where the contents of each CSV file would look like the following: +```text text,label "I love this movie!","positive" "This movie was awful.","negative" "I have mixed feelings about this film.","neutral" ``` -The first row contains the column names, and each subsequent row represents a single example. +### Example -Whether you choose to use JSON lines or CSV will depend on your specific needs and preferences, but either format will work well with Hugging Face and Sparsify. -Make sure your data is formatted correctly according to these specifications to ensure it can be used in your experiments. -### One Shot -For one-shot experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. -This format is efficient and versatile. -In the near future, more functionality will be landed such that the definitions given above for Training Aware and Sparse Transfer will work as well. +## NPZ -#### Specifications +For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. +In the future, more formats will be added for support with One-Shot Experiments. -- Each `.npz` file should contain a single data sample, with no batch dimension. This data sample will be run through the ONNX model. +### Specifications +- Each `.npz` file should contain a single data sample, with no batch dimension. + This data sample will be run through the ONNX model. - The `.npz` file should be structured as a dictionary, mapping the input name in the ONNX specification to a numpy array containing the data. -- All data samples should be stored under the same directory, typically named `data`. +- All data samples should be stored under the same directory, typically named `data`. -The local file structure should look like the following: +The root directory containing the data samples should be passed to the CLI as the `--data` argument. +### Structure ```text data - -- input1.npz - -- input2.npz - -- input3.npz +├── input1.npz +├── input2.npz +├── input3.npz ``` -#### Example - -For example, if you have a BERT-style model with a sequence length of 128, each `.npz` file should contain a dictionary mapping input names ("input_ids", "attention_mask", "token_type_ids") to numpy arrays of the appropriate size: - +Where each `input#.npz` file contains a single data sample, and the data sample is structured as a dictionary mapping the input name in the ONNX specification to a numpy array containing the data that matches the input shapes without the batch dimension. +For example, a BERT-style model running with a sequence length of 128 would have the following data sample: ```text { - "input_ids": ndarray(128,), - "attention_mask": ndarray(128,), - "token_type_ids": ndarray(128,) + "input_ids": ndarray(128,), + "attention_mask": ndarray(128,), + "token_type_ids": ndarray(128,) } ``` -The dictionary keys should match the names of the inputs in the ONNX model specification, and the shapes of the arrays should match the expected input shapes of the model. - -#### Generating NPZ Files +### Example Below is an example script for generating this file structure from a PyTorch module before the ONNX export: @@ -199,37 +273,37 @@ class NumpyExportWrapper(torch.nn.Module): self.model = model self.model.eval() # Set model to evaluation mode self.numpy_data = [] - + def forward(self, *args, **kwargs): with torch.no_grad(): inputs = {} batch_size = 0 - + for index, arg in enumerate(args): if isinstance(arg, Tensor): inputs[f"input_{index}"] = arg batch_size = arg.size[0] - + for key, val in kwargs.items(): if isinstance(val, Tensor): inputs[key] = val - batch_size = arg.size[0] - + batch_size = val.shape[0] + start_index = len(self.numpy_data) for _ in range(batch_size): self.numpy_data.append({}) - - for index, (input_key, input_batch) in enumerate(inputs): - for input_ in input_batch: - self.numpy_data[start_index + index][input_key] = input_ - + + for input_key in iter(inputs): + for idx, input in enumerate(inputs[input_key]): + self.numpy_data[start_index+idx][input_key] = input + return self.model(*args, **kwargs) def save(self, path: str = "data"): for index, item in enumerate(self.numpy_data): npz_file_path = f'{path}/input{str(index).zfill(4)}.npz' np.savez(npz_file_path, **item) - + print(f'Saved {len(self.numpy_data)} npz files to {path}') model = NumpyExportWrapper(YOUR_MODEL) @@ -238,9 +312,9 @@ for data in YOUR_DATA_LOADER: model.save() ``` -Note: Replace YOUR_MODEL and YOUR_DATA_LOADER with your PyTorch model and data loader, respectively. +Note: Replace `YOUR_MODEL` and `YOUR_DATA_LOADER` with your PyTorch model and data loader, respectively. -## Custom Use Cases +## Custom Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. In the near future, these will be supported through plugin specifications. diff --git a/docs/models-guide.md b/docs/models-guide.md index 93f5be86..6610ca4c 100644 --- a/docs/models-guide.md +++ b/docs/models-guide.md @@ -16,19 +16,80 @@ limitations under the License. # Sparsify Models Guide -For any Sparsify Experiments, a dense model can be supplied for sparsification. -One Shot is the only experiment type that requires a model to be passed in. -For others, a default model will be chosen to best fit the given use case. -Due to the varied ML pipelines and implementations, Sparsify standardizes on a few, popular formats for models. -You will need to make sure that your models are formatted properly according to the standards listed below. +For most Sparsify Experiments, you will need to provide a base model to create a sparse model from. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for models. +You will need to make sure that your model is formatted properly according to the standards listed below. -## One Shot +## Table of Contents -The ONNX model format is the only currently supported one for one shot. -See the SparseML documentation for exporting to ONNX formats. -In the near future, more formats will be added for support with one shot. +1. [Image Classification](#image-classification) +2. [Object Detection](#object-detection) +3. [Image Segmentation](#image-segmentation) +4. [NLP](#nlp) +5. [ONNX](#onnx) +6. [Custom](#custom) -## Training Aware and Sparse Transfer +## Image Classification -The PyTorch model format is the only currently supported one for training aware and sparse transfer experiments. -The exact format will depend on the pipeline, and therefore the use case, for the experiment. +For image classification tasks, Sparsify relies on the PTH format generated from SparseML. +Specifically, the PTH format generated from the `ModuleExporter` class in SparseML. +This will save a model in the PTH format with the following structure: + +### Structure +```text +{ + "state_dict": model.state_dict(), + "optimizer": optimizer.state_dict(), + "recipe": recipe, + "epoch": epoch, + "arch_key": arch_key, +} +``` + +### Example +```python +from sparseml.pytorch.image_classification.utils import ModuleExporter +from torchvision.models import resnet18 + +model = resnet18() +exporter = ModuleExporter(model, "./") +exporter.export_pytorch( + optimizer=None, + epoch=-1, + recipe=None, + name=f"{model}.pth", + arch_key="resnet18", +) +``` + +## Object Detection + +For object detection tasks, Sparsify utilizes the YOLO format for models. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/) +This is the default format that is saved from training within the YOLOv5 or YOLOv8 repos. + +More information on the YOLO format can be found [here](https://docs.ultralytics.com/tasks/detect/#models). + +## Image Segmentation + +For image segmentation tasks, Sparsify utilizes the YOLO format for models. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/) +This is the default format that is saved from training within the YOLOv5 or YOLOv8 repos. + +More information on the YOLO format can be found [here](https://docs.ultralytics.com/tasks/segment/#models). + +## NLP + +For NLP tasks, Sparsify utilizes the HuggingFace Models format and expectations. +This includes the standard tokenizer.json, config.json, and bin files. +If using any of the standard transformers pathways externally or through SparseML, then this is the default format models are saved in. + +More information on the HuggingFace Models format can be found [here](https://huggingface.co/transformers/model_sharing.html). + +## ONNX + +For One-Shot Experiments, Sparsify utilizes the `.ONNX` format for models. +In the future, more formats will be added for support with One-Shot Experiments. + +For more information on the ONNX format, see the [ONNX website](https://onnx.ai/). +For more information on exporting to the ONNX format, see our docs page [here](https://docs.neuralmagic.com/user-guides/onnx-export). diff --git a/docs/one-shot-experiment-guide.md b/docs/one-shot-experiment-guide.md new file mode 100644 index 00000000..49b0159b --- /dev/null +++ b/docs/one-shot-experiment-guide.md @@ -0,0 +1,146 @@ + + +# Sparsify One-Shot Experiment Guide + +If you're just getting started with Sparsify, we recommend you try out this One-Shot Experiment pathway first. +We also have Sparse-Transfer and Training-Aware Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. + +## Table of Contents + +1. [Experiment Overview](#experiment-overview) +2. [CLI Quickstart](#cli-quickstart) +4. [Examples](#examples) +5. [Next Steps](#next-steps) +6. [Resources](#resources) + + +## Experiment Overview + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|----------| +| **++** | **+++++** | **+++** | + +One-Shot Experiments are the quickest way to create a faster and smaller version of your model. +The algorithms are applied to the model post-training, utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. + +Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. +They are ideal for when you want to quickly sparsify your model and have limited time to spend on the sparsification process. + +The CLI Quickstart below will walk you through the steps to run a One-Shot Experiment on your model. +To utilize the cloud pathways for One-Shot Experiments, review the [Cloud User Guide](./cloud-user-guide.md). + +## CLI Quickstart + +Now that you understand what a One-Shot Experiment is and the benefits, including short optimization time due to post-training algorithms, you can now use the CLI to effectively run a One-Shot Experiment. + +Before you run a One-Shot Experiment, confirm you are logged into the Sparsify CLI. +For installation and setup instructions, review the [Install and Setup Section](../README.md#1-install-and-setup) in the Sparsify README. + +One-Shot Experiments use the following general command: + +```bash +sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL* +``` + +* optional arguments + +The description, rules, and possible values for each of the arguments are described below: +- [USE_CASE](#use_case) +- [MODEL](#model) +- [DATA](#data) +- [OPTIM_LEVEL](#optim_level) (Optional) + +### USE_CASE + +The generally supported use cases for Sparsify are: +- `cv-classification` +- `cv-detection` +- `cv-segmentation` +- `nlp-question_answering` +- `nlp-text_classification` +- `nlp-sentiment_analysis` +- `nlp-token_classification` +- `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. +Sparsify will automatically recognize these aliases and apply the correct use case. + +For One-Shot Experiments, both the CLIs and APIs always support custom use cases. +To utilize, run a One-Shot Experiment with `--use-case` set to the desired custom use case. +This custom use case can be any ASCII string. + +### MODEL + +One-Shot requires the model provided to be in an [ONNX format](https://onnx.ai/). +The ONNX model must be exported with static input shapes and not contain custom ONNX operators. +For guidance on how to convert a PyTorch model to ONNX, read our [ONNX Export User Guide](https://docs.neuralmagic.com/user-guides/onnx-export). + +In the near future, more formats including PyTorch will be added for support with One-Shot Experiments. + +### DATA + +For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. +In the future, more formats will be added for support with One-Shot Experiments. + +Specifically, the following structure is expected for the dataset: +```text +data +├── input1.npz +├── input2.npz +├── input3.npz +``` + +Where each `input#.npz` file contains a single data sample, and the data sample is structured as a dictionary mapping the input name in the ONNX specification to a numpy array containing the data that matches the input shapes without the batch dimension. +For example, a BERT-style model running with a sequence length of 128 would have the following data sample: +```text +{ + "input_ids": ndarray(128,), + "attention_mask": ndarray(128,), + "token_type_ids": ndarray(128,) +} +``` + +For more information on the specs and guides for creating the NPZ format, read the [NPZ Dataset Guide](./datasets-guide.md#npz). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. +Specifically, it controls how much sparsification is applied to your model, with higher values resulting in faster and more compressed models. +At the max range, though, you may see a drop in accuracy. + +Given that One-Shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with Sparse-Transfer or Training-Aware. +The current ranges are the following (subject to change): +- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. +- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. +- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 40% for 0.3 to 80% for 1.0 with linear scaling between. + Additionally, INT8 quantization of the model is applied. + +The default of 0.5 will result in a ~50% sparse model with INT8 quantization. + +## Examples + +Check back in soon for walkthroughs and examples of One-Shot Experiments applied to various popular models and use cases. + +### Next Steps + +Now that you have successfully run a One-Shot Experiment, check out the following guides to continue your Sparsify journey: +- [Sparse Transfer Experiment Guide](./sparse-transfer-experiment-guide.md) +- [Training Aware Experiment Guide](./training-aware-experiment-guide.md) + +### Resources + +To learn more about Sparsify and the available pathways other than One-Shot Experiments, refer to the [Sparsify README](../README.md). diff --git a/docs/optim-levels-guide.md b/docs/optim-levels-guide.md deleted file mode 100644 index 8e3e062c..00000000 --- a/docs/optim-levels-guide.md +++ /dev/null @@ -1,68 +0,0 @@ - - -# Sparsify Optim (Sparsification) Levels Guide - -When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. -Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. -At the max range, though, you may see a drop in accuracy. -The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. -0.5 is the default optim level and is a good starting point for most use cases. - -## Optim Level Values - -The general rule is that 0.0 is the baseline model, <0.3 only quantizes the model, and 0.3-1.0 increases the sparsity (unstructured/structured pruning) of the model and applies quantization. -The exact mappings of optim levels depends on the experiment type. -The current mappings for each experiment type are listed below. -Note, these mappings are subject to change in future releases as we continue to improve Sparsify with new algorithms and capabilities. - -### One-Shot Optim Levels - -Given that one shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with sparse transfer or training aware. -The specific ranges are the following: - -- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. -- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. -- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 40% for 0.3 to 80% for 1.0 with linear scaling between. - Additionally, INT8 quantization of the model is applied. - -The default of 0.5 will result in a ~50% sparse model with INT8 quantization. - -### Sparse Transfer Optim Levels - -Sparse transfer mappings are a bit different from one shot and training aware since it maps to models available in the SparseZoo to transfer from. -Increasing the optim level will result in smaller and more compressed models. -The specific mappings are the following: - -- optim-level == 0.0: the largest model selected from the SparseZoo with no optimizations. -- optim-level < 0.25: the largest model selected from the SparseZoo with INT8 quantization applied to the model (activations and weights). -- optim-level < 0.5: the largest model selected form the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. -- optim-level < 0.75: the medium model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. -- optim-level <= 1.0: the smallest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. - -The default of 0.5 will result in a medium-sized sparse model with INT8 quantization. - -### Training-Aware Optim Levels - -Given that training aware is applied while training, the sparsity ranges are increased as compared to one shot since accuracy recovery is easier at higher sparsities. -The specific ranges are the following: - -- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. -- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. -- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 60% for 0.3 to 95% for 1.0 with linear scaling between. - Additionally, INT8 quantization of the model is applied. - -The default of 0.5 will result in a ~70% sparse model with INT8 quantization. diff --git a/docs/sparse-transfer-experiment-guide.md b/docs/sparse-transfer-experiment-guide.md new file mode 100644 index 00000000..ba108b19 --- /dev/null +++ b/docs/sparse-transfer-experiment-guide.md @@ -0,0 +1,159 @@ + + +# Sparsify Sparse-Transfer Experiment Guide + +The Sparsify Sparse-Transfer Experiment Guide is a guide for running Sparse-Transfer Experiments with the Sparsify CLI. +We also have One-Shot and Training-Aware Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. + +## Table of Contents + +1. [Experiment Overview](#experiment-overview) +2. [CLI Quickstart](#cli-quickstart) +4. [Examples](#examples) +5. [Next Steps](#next-steps) +6. [Resources](#resources) + +## Experiment Overview + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|-----------| +| **++++** | **++++** | **+++++** | + +Sparse-Transfer Experiments are the second quickest way to create a faster and smaller model for your dataset. +Sparse, foundational models that have been pre-sparsified on a large, upstream dataset such as ImageNet are transferred to your dataset through fine-tuning keeping the sparse architecture intact. + +Generally, Sparse-Transfer Experiments result in a 5–10x speedup with minimal accuracy loss. +They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. + +The CLI Quickstart below will walk you through the steps to run a Sparse-Transfer Experiment on your model. +To utilize the cloud pathways for Sparse-Transfer Experiments, review the [Cloud User Guide](./cloud-user-guide.md). + +## CLI Quickstart + +Now that you understand what a Sparse-Transfer Experiment is and the benefits, including fine-tuning a pre-optimized, sparse model on your data, you're ready to use the CLI to effectively run a Sparse-Transfer Experiment. + +Before you run a Sparse-Transfer Experiment, confirm you are logged into the Sparsify CLI. +For instructions on Installation and Setup, review the [Sparsify Install and Setup Section](READMEsection.com) in the Sparsify README. + +Sparse-Transfer Experiments use the following general command: + +```bash +sparsify.run sparse-transfer --use-case USE_CASE --data DATA --optim-level OPTIM_LEVEL* --model MODEL* +``` + +* optional arguments + +The description, rules, and possible values for each of the arguments are described below: +- [USE_CASE](#use_case) +- [DATA](#data) +- [OPTIM_LEVEL](#optim_level) (Optional) +- [MODEL](#model) (Optional) + +### USE_CASE + +The generally supported use cases for Sparsify are: +- `cv-classification` +- `cv-detection` +- `cv-segmentation` +- `nlp-question_answering` +- `nlp-text_classification` +- `nlp-sentiment_analysis` +- `nlp-token_classification` +- `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. +Sparsify will automatically recognize these aliases and apply the correct use case. + +Currently, custom use cases are not supported for Sparse-Transfer Experiments. + +### DATA + +For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. +Confirm that your data is formatted properly according to the standards listed below. + +Different use cases may require different input formats depending on what is considered standard for that use case. +Specifically, the following are the supported formats as well as links to specs and guides for creating datasets for each format: +- `cv-classification`: Image Folder Format + - [Image Classification Dataset Guide](./datasets-guide#image-classification) + - Example structure: data/{SPLIT}/{CLASS}/{IMAGE.EXT}) +- `cv-detection` - YOLO Format + - [Object Detection Dataset Guide](./datasets-guide#object-detection) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/labels/{SPLIT}/{IMAGE.EXT}) +- `cv-segmentation` - YOLO Format + - [Image Segmentation Dataset Guide](./datasets-guide#image-segmentation) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/annotations/{SPLIT}/{IMAGE.EXT}) +- `nlp-*`: Hugging Face CSV or JSONW Format + - [NLP Dataset Guide](./datasets-guide#nlp) + - Example structure: data/{SPLIT}.csv or data/{SPLIT}.jsonl or data/{SPLIT}.json + +Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. +In the near future, these will be supported through plugin specifications. + +For full details on Sparsify datasets, read the [Sparsify Datasets Guide](./datasets-guide.md). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. +Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. +At the max range, though, you may see a drop in accuracy. + +Sparse-Transfer optim_level mappings are unique since they map to models available in the SparseZoo to transfer from. +Increasing the optim level will result in smaller and more compressed models. +The specific mappings are the following: +- optim-level == 0.0: the largest model selected from the SparseZoo with no optimizations. +- optim-level < 0.25: the largest model selected from the SparseZoo with INT8 quantization applied to the model (activations and weights). +- optim-level < 0.5: the largest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. +- optim-level < 0.75: the medium model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. +- optim-level <= 1.0: the smallest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. + +The default of 0.5 will result in a medium-sized sparse model with INT8 quantization, and is a good default to start with. + +#### MODEL + +Models are optional for the Sparse-Transfer pathway. +If no model is provided, the best pre-sparsified model and recipe from the SparseZoo for the given optimization level will be used. + +If you choose to override the model, it is expected to be a pre-sparsified model and adhere to the following formats depending on the use case: +- `cv-classification`: SparseML PTH Format + - [Image Classification Models Guide](./models-guide#image-classification) +- `cv-detection` - YOLOv5/YOLOv8 Format + - [Object Detection Models Guide](./models-guide#object-detection) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/labels/{SPLIT}/{IMAGE.EXT}) +- `cv-segmentation` - YOLOv5/YOLOv8 Format + - [Image Segmentation Models Guide](./models-guide#image-segmentation) +- `nlp-*`: Hugging Face Format + - [NLP Models Guide](./models-guide#nlp) + +Currently, custom use cases are not supported for model representation and models must conform to the definitions above. +In the near future, these will be supported through plugin specifications. + +For full details on Sparsify models, read the [Sparsify Models Guide](./models-guide.md). + +## Examples + +Check back in soon for walkthroughs and examples of One-Shot Experiments applied to various popular models and use cases. + +### Next Steps + +Now that you have successfully run a Sparse-Transfer Experiment, check out the following guides to continue your Sparsify journey: +- [Training Aware Experiment Guide](./training-aware-experiment-guide.md) +- [One-Shot Experiment Guide](./one-shot-experiment-guide.md) + +### Resources + +To learn more about Sparsify and the available pathways other than Sparse-Transfer Experiments, refer to the [Sparsify README](../README.md). diff --git a/docs/training-aware-experiment-guide.md b/docs/training-aware-experiment-guide.md new file mode 100644 index 00000000..92f2f40f --- /dev/null +++ b/docs/training-aware-experiment-guide.md @@ -0,0 +1,158 @@ + + +# Sparsify Training-Aware Experiment Guide + +The Sparsify Training-Aware Experiment Guide is a guide for running Training-Aware Experiments with the Sparsify CLI. +We also have One-Shot and Sparse-Transfer Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. + +## Table of Contents + +1. [Experiment Overview](#experiment-overview) +2. [CLI Quickstart](#cli-quickstart) +4. [Examples](#examples) +5. [Next Steps](#next-steps) +6. [Resources](#resources) + +## Experiment Overview + +| Sparsity | Sparsification Speed | Accuracy | +|-----------|-----------------------|-----------| +| **+++++** | **++** | **+++++** | + +Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. +The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. +However, they do require additional training time and hyperparameter tuning to achieve the best results. + +Generally, Training-Aware Experiments result in a 6–12x speedup with minimal accuracy loss. +They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. + +The CLI Quickstart below will walk you through the steps to run a Training-Aware Experiment on your model. +To utilize the cloud pathways for Training-Aware Experiments, review the [Cloud User Guide](./cloud-user-guide.md). + +## CLI Quickstart + +Now that you understand what a Training-Aware Experiment is and the benefits, including the best possible recovery of accuracy for an optimized model, you're ready to use the CLI to effectively run a Training-Aware Experiment. + +Before you run a Training-Aware Experiment, confirm you are logged in to the Sparsify CLI. +For instructions on Installation and Setup, review the [Sparsify Install and Setup Section](READMEsection.com) in the Sparsify README. + +Training-Aware Experiments use the following general command: + +```bash +sparsify.run training-aware --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL* +``` + +* optional arguments + +The values for each of the arguments follow these general rules: +- [USE_CASE](#use_case) +- [MODEL](#model) +- [DATA](#data) +- [OPTIM_LEVEL](#optim_level) (Optional) + +### USE_CASE + +The generally supported use cases for Sparsify are: +- `cv-classification` +- `cv-detection` +- `cv-segmentation` +- `nlp-question_answering` +- `nlp-text_classification` +- `nlp-sentiment_analysis` +- `nlp-token_classification` +- `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. +Sparsify will automatically recognize these aliases and apply the correct use case. + +Currently, custom use cases are not supported for Training-Aware Experiments. + +#### MODEL + +Models are optional for the Sparse-Transfer pathway. +If no model is provided, a performance and accuracy balanced base model for the use case will be chosen. + +If you choose to override the model, it is expected to be a pre-sparsified model and adhere to the following formats depending on the use case: +- `cv-classification`: SparseML PTH Format + - [Image Classification Models Guide](./models-guide#image-classification) +- `cv-detection` - YOLOv5/YOLOv8 Format + - [Object Detection Models Guide](./models-guide#object-detection) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/labels/{SPLIT}/{IMAGE.EXT}) +- `cv-segmentation` - YOLOv5/YOLOv8 Format + - [Image Segmentation Models Guide](./models-guide#image-segmentation) +- `nlp-*`: Hugging Face Format + - [NLP Models Guide](./models-guide#nlp) + +Currently, custom use cases are not supported for model representation and models must conform to the definitions above. +In the near future, these will be supported through plugin specifications. + +For full details on Sparsify models, read the [Sparsify Models Guide](./models-guide.md). + +#### DATA + +For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. +Confirm that your data is formatted properly according to the standards listed below. + +Different use cases may require different input formats depending on what is considered standard for that use case. +Specifically, the following are the supported formats as well as links to specs and guides for creating datasets for each format: +- `cv-classification`: Image Folder Format + - [Image Classification Dataset Guide](./datasets-guide#image-classification) + - Example structure: data/{SPLIT}/{CLASS}/{IMAGE.EXT}) +- `cv-detection` - YOLO Format + - [Object Detection Dataset Guide](./datasets-guide#object-detection) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/labels/{SPLIT}/{IMAGE.EXT}) +- `cv-segmentation` - YOLO Format + - [Image Segmentation Dataset Guide](./datasets-guide#image-segmentation) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/annotations/{SPLIT}/{IMAGE.EXT}) +- `nlp-*`: Hugging Face CSV or JSONW Format + - [NLP Dataset Guide](./datasets-guide#nlp) + - Example structure: data/{SPLIT}.csv or data/{SPLIT}.jsonl or data/{SPLIT}.json + +Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. +In the near future, these will be supported through plugin specifications. + +For full details on Sparsify datasets, read the [Sparsify Datasets Guide](./datasets-guide.md). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. +Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. +At the max range, though, you may see a drop in accuracy. + +Given that Training-Aware is applied while training, the sparsity ranges are increased as compared to one shot since accuracy recovery is easier at higher sparsities. +The specific ranges are the following: +- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. +- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. +- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 60% for 0.3 to 95% for 1.0 with linear scaling between. + Additionally, INT8 quantization of the model is applied. + +The default of 0.5 will result in a ~70% sparse model with INT8 quantization, and is a good default to start with. + +## Examples + +Check back in soon for walkthroughs and examples of One-Shot Experiments applied to various popular models and use cases. + +### Next Steps + +Now that you have successfully run a Training-Aware Experiment, check out the following guides to continue your Sparsify journey: +- [One-Shot Experiment Guide](./one-shot-experiment-guide.md) +- [Sparse Transfer Experiment Guide](./sparse-transfer-experiment-guide.md) + +### Resources + +To learn more about Sparsify and the available pathways other than Training-Aware Experiments, refer to the [Sparsify README](../README.md). diff --git a/docs/use-cases-guide.md b/docs/use-cases-guide.md deleted file mode 100644 index 51358856..00000000 --- a/docs/use-cases-guide.md +++ /dev/null @@ -1,57 +0,0 @@ - - -# Sparsify Use Cases Guide - -To use Sparsify, you must specify a use case for all experiments run. -A use case is the specific task or domain/sub-domain you wish to sparsify a model for such as image classification, object detection, or text classification. -It is used to enable Sparsify to apply the best sparsification techniques for your use case, to automatically package the model for deployment, and depending on what is run, to load specific pipelines for data loading and training. - -## Use Cases - -The generally supported use cases for Sparsify currently are: -- CV - classification: `cv-classification` -- CV - detection: `cv-detection` -- CV - segmentation: `cv-segmentation` -- NLP - question answering: `nlp-question_answering` -- NLP - text classification: `nlp-text_classification` -- NLP - sentiment analysis: `nlp-sentiment_analysis` -- NLP - token classification: `nlp-token_classification` -- NLP - named entity recognition: `nlp-named_entity_recognition` - -Note, other aliases are recognized for these use cases such as image-classification for cv-classification. -Sparsify will automatically recognize these aliases and apply the correct use case. - -### Custom Use Cases - -If you wish to use Sparsify for a use case that is not in the list of currently supported use cases, you can use a custom use case for some pathways in Sparsify. -The custom use cases will be saved into the Sparsify cloud for future reuse when run through a supported pathway. -The pathways that support custom use cases are listed below. - -Note, custom use cases will prevent Sparsify from applying known, domain specific knowledge for sparsification of your model. -Additionally, it will prevent autofill of the pre and post processing functions when creating a deployment package. - -#### One Shot - -For One Shot experiments, both the CLIs and APIs always support custom use cases. -To utilize, run a one shot experiment with `--use-case` set to the desired custom use case. - -### Training Aware - -For Training Aware experiments, custom use cases are only supported with the APIs for custom integrations. -This is because non-custom integrations utilize plugins that corresponding to the appropriate use case for training pipelines. -To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. -More info on this specific pathway will be available in the near future as Sparsify development progresses. diff --git a/setup.py b/setup.py index 4bb6efc2..3c38dfdb 100644 --- a/setup.py +++ b/setup.py @@ -26,11 +26,10 @@ # load and overwrite version and release info from sparseml package exec(open(os.path.join("src", "sparsify", "version.py")).read()) print(f"loaded version {version} from src/sparsify/version.py") -version_nm_deps = f"{version_major_minor}.0" +version_nm_deps = f"{version_major_minor}.0.202308" _PACKAGE_NAME = "sparsify" if is_release else "sparsify-nightly" - _deps = [ "pydantic>=1.8.2,<2.0.0", "pyyaml>=5.0.0", @@ -39,14 +38,14 @@ "setuptools>=56.0.0", "optuna>=3.0.2", "onnxruntime-gpu", - "protobuf<=3.20.1,>=3.12.2", -] -_nm_deps = [ f"{'sparsezoo' if is_release else 'sparsezoo-nightly'}~={version_nm_deps}", - f"{'sparseml' if is_release else 'sparseml-nightly'}[torchvision,transformers,yolov5]~={version_nm_deps}", # noqa E501 f"{'deepsparse' if is_release else 'deepsparse-nightly'}~={version_nm_deps}", + f"{'sparseml' if is_release else 'sparseml-nightly'}[torchvision,yolov5]~={version_nm_deps}", # noqa E501 ] +_nm_deps = [ + f"{'sparseml' if is_release else 'sparseml-nightly'}[transformers]~={version_nm_deps}", # noqa E501 +] _dev_deps = [ "black>=20.8b1", @@ -57,6 +56,11 @@ "fastai>=2.7.7", ] +_llm_deps = [ + "llm-foundry==0.2.0", + f"{'nm-transformers' if is_release else 'nm-transformers-nightly'}", +] + def _setup_packages() -> List: return find_packages( @@ -69,11 +73,11 @@ def _setup_package_dir() -> Dict: def _setup_install_requires() -> List: - return _nm_deps + _deps + return _deps def _setup_extras() -> Dict: - return {"dev": _dev_deps} + return {"dev": _dev_deps, "_nm_deps": _nm_deps, "llm": _llm_deps} def _setup_entry_points() -> Dict: @@ -81,6 +85,8 @@ def _setup_entry_points() -> Dict: "console_scripts": [ "sparsify.run=sparsify.cli.run:main", "sparsify.login=sparsify.login:main", + "sparsify.check_environment=sparsify.check_environment.main:main", + "finetune=sparsify.auto.tasks.finetune.finetune:parse_args_and_run", ] } @@ -114,12 +120,12 @@ def _setup_long_description() -> Tuple[str, str]: install_requires=_setup_install_requires(), extras_require=_setup_extras(), entry_points=_setup_entry_points(), - python_requires=">=3.7.0", + python_requires=">=3.8.0", classifiers=[ "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Intended Audience :: Developers", diff --git a/src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml b/src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml new file mode 100644 index 00000000..6b2f0c1d --- /dev/null +++ b/src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml @@ -0,0 +1,134 @@ +max_seq_len: 2048 +global_seed: 17 +model_name_or_path: mosaicml/mpt-7b-instruct +load_path: /storage/dsikka/mpt_7b_instruct_oneshot_sp70.pt +precision: amp_bf16 + +max_duration: 1ep +eval_interval: 1ep +# eval_subset_num_batches: 3 # use this for quick testing +eval_first: true +seed: ${global_seed} + +global_train_batch_size: 1 +# for mpt-7b dense: +# 4 x A100_80GB = "device_train_microbatch_size: 12" +# 8 x A6000_48GB = "device_train_microbatch_size: 6" + +# for mpt-7b sparse (with masks): +# 8 x A6000_48GB = "device_train_microbatch_size: 4" +device_train_batch_size: 1 +device_train_microbatch_size: 1 +device_eval_batch_size: 1 + +# Run Name +run_name: test_run + +model: + name: hf_causal_lm + pretrained: true + pretrained_model_name_or_path: mosaicml/mpt-7b-instruct + max_seq_len: ${max_seq_len} + config_overrides: + attn_config: + attn_impl: torch + # Set this to `true` if using `train_loader.dataset.packing_ratio` below + attn_uses_sequence_id: true + +# Tokenizer +tokenizer: + name: EleutherAI/gpt-neox-20b + kwargs: + model_max_length: ${max_seq_len} + +# Dataloaders +train_loader: + name: finetuning + dataset: + hf_name: mosaicml/dolly_hhrlhf + split: train + max_seq_len: ${max_seq_len} + allow_pad_trimming: false + decoder_only_format: true + # # Use `python llmfoundry/data/packing.py --yaml-path /path/to/this/yaml/ ...` + # # to profile this run's optimal packing_ratio as it depends on GPU count, + # # batch size, sequence length + packing_ratio: 13 # padding=0.36%, waste=0.79% + shuffle: true + drop_last: false + num_workers: 8 + pin_memory: false + prefetch_factor: 2 + persistent_workers: true + timeout: 0 + +eval_loader: + name: finetuning + dataset: + hf_name: mosaicml/dolly_hhrlhf + split: test + max_seq_len: ${max_seq_len} + allow_pad_trimming: false + decoder_only_format: true + packing_ratio: 13 + shuffle: false + drop_last: false + num_workers: 8 + pin_memory: false + prefetch_factor: 2 + persistent_workers: true + timeout: 0 + +# Optimization +scheduler: + name: linear_decay_with_warmup + t_warmup: 20ba + alpha_f: 0 + +optimizer: + name: decoupled_adamw + lr: 1e-4 + betas: + - 0.9 + - 0.999 + eps: 1.0e-8 + weight_decay: 0.0 + +# we can't use gradient clipping for sparse training runs because we don't have +# a way to mask gradients of pruned weights, and thus the global gradient norm +# will be incorrect +# algorithms: +# gradient_clipping: +# clipping_type: norm +# clipping_threshold: 1.0 + +# FSDP +fsdp_config: + sharding_strategy: FULL_SHARD + mixed_precision: FULL + activation_checkpointing: true + activation_checkpointing_reentrant: false + activation_cpu_offload: false + limit_all_gathers: true + verbose: false + +# Logging +progress_bar: false +log_to_console: true +console_log_interval: 1ba + +callbacks: + speed_monitor: + window_size: 10 + lr_monitor: {} + memory_monitor: {} + runtime_estimator: {} + +loggers: + tensorboard: {} + +# Checkpoint to local filesystem or remote object store +save_interval: 1ep +save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK +save_folder: output_dir/{run_name}/checkpoints +save_overwrite: true \ No newline at end of file diff --git a/src/sparsify/auto/scripts/main.py b/src/sparsify/auto/scripts/main.py index ea46270d..365c8e7b 100644 --- a/src/sparsify/auto/scripts/main.py +++ b/src/sparsify/auto/scripts/main.py @@ -25,7 +25,9 @@ ) from sparsify.schemas import APIArgs from sparsify.schemas.auto_api import SparsificationTrainingConfig +from sparsify.utils import get_task_info from tensorboard.program import TensorBoard +from tensorboard.util import tb_logging _LOGGER = logging.getLogger("auto_banner") @@ -41,6 +43,20 @@ def main(api_args: APIArgs): deploy_directory, ) = create_save_directory(api_args) + if api_args.task in get_task_info("finetune").aliases: + _LOGGER.info( + "Running finetuning. " + "Currently only arguments passed for use-case and data will be considered" + ) + config = SparsificationTrainingConfig( + task=api_args.task, dataset=api_args.dataset, base_model=None, recipe=None + ) + runner = TaskRunner.create(config) + runner.train(train_directory=train_directory, log_directory=log_directory) + return + + _suppress_tensorboard_logs() + # Launch tensorboard server tensorboard_server = TensorBoard() tensorboard_server.configure(argv=[None, "--logdir", log_directory]) @@ -48,17 +64,25 @@ def main(api_args: APIArgs): _LOGGER.info(f"TensorBoard listening on {url}") # Request config from api and instantiate runner + raw_config = api_request_config(api_args) config = SparsificationTrainingConfig(**raw_config) - runner = TaskRunner.create(config) + runner = TaskRunner.create(config) # Execute integration run and return metrics metrics = runner.train(train_directory=train_directory, log_directory=log_directory) + yaml.safe_dump( metrics.dict(), (Path(train_directory).parent / "metrics.yaml").open("w") ) - runner.export(model_directory=train_directory) runner.create_deployment_directory( train_directory=train_directory, deploy_directory=deploy_directory ) + + +def _suppress_tensorboard_logs(): + # set tensorboard logger to warning level + # avoids a constant stream of logs from tensorboard + tb_logger = tb_logging.get_logger() + tb_logger.setLevel(logging.WARNING) diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md new file mode 100644 index 00000000..be432b82 --- /dev/null +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -0,0 +1,84 @@ +# Sparsify Deployment Guide +​ +Deploying with Neural Magic's inference runtime, [DeepSparse](https://github.com/neuralmagic/deepsparse), is recommended for the best performance with sparsified models on CPUs. +The deployment folder contains everything necessary to benchmark and deploy a sparsified model with DeepSparse. +​ +## Requirements +​ +A Linux-based CPU system with Python versions 3.8-3.10 installed and AVX2 or greater instruction set is required to run DeepSparse. +DeepSparse is not currently supported on Windows or MacOS. +To install DeepSparse, its dependencies, and check your system, run the following commands: +​ +```bash +pip install deepsparse[server] +deepsparse.check_hardware +``` +​ +Other installation options may be needed, depending on your use case. +For more details and other installation options, see the [Installation Guide](https://github.com/neuralmagic/deepsparse). + +For the latest hardware support and system requirements, see the [Support and Requirements Guide](https://github.com/neuralmagic/deepsparse). +​ +## Benchmarking +​ +The `deepsparse.benchmark` command enables benchmarking of an ONNX model on your system. +The command takes a model path as a minimum argument and will run the model through a series of inference runs using random data. +For example: +​ +```bash +deepsparse.benchmark model.onnx +``` +​ +For more information on the `deepsparse.benchmark` command, see the [Benchmarking Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). +​ +## Pipeline Deployments +​ +DeepSparse contains many pipeline deployments for different use cases. +These pipelines package up the model inference and any pre- and post-processing steps into a single, optimized callable for deployment. +Additionally, custom pipelines are supported. +For example, a sample custom pipeline for ImageNet is provided below: +​ +```python +from deepsparse.pipelines.custom_pipeline import CustomTaskPipeline +from torchvision import transforms +from PIL import Image +import torch +​ +preprocess_transforms = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) +​ +def preprocess(img_file): + with open(img_file, "rb") as img_file: + img = Image.open(img_file) + img = img.convert("RGB") + img = preprocess_transforms(img) + batch = torch.stack([img]) + return [batch.numpy()] +​ +custom_pipeline = CustomTaskPipeline( + model_path="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none", + process_inputs_fn=preprocess, +) +​ +scores, probs = custom_pipeline("buddy.jpeg") +``` +(Note: Download [buddy.jpeg](https://raw.githubusercontent.com/neuralmagic/deepsparse/main/tests/deepsparse/pipelines/sample_images/buddy.jpeg)) + +​ +For more information on the available pipelines and how to create custom pipelines, see the [Pipeline Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). +​ +## Server Deployments +​ +DeepSparse additionally contains a performant server deployment for different use cases. +The server deployment packages up the model inference and any pre- and post-processing steps into a single, optimized HTTP request for deployment. +To start the server, run the following command with the appropriate arguments: +​ +```bash +deepsparse.server --task TASK --model_path ./deployment/model.onnx +``` +​ +For more information on the `deepsparse.server` command, see the [Server Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-server.md). diff --git a/src/sparsify/auto/tasks/finetune/__init__.py b/src/sparsify/auto/tasks/finetune/__init__.py new file mode 100644 index 00000000..c3b6bcb6 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +from .args import * + + +try: + from .finetune import * + from .runner import * +except ImportError as exception: + raise ImportError( + "To use the llm finetuning pathway, please install sparsify[llm]" + ) from exception diff --git a/src/sparsify/auto/tasks/finetune/args.py b/src/sparsify/auto/tasks/finetune/args.py new file mode 100644 index 00000000..7e8e3389 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/args.py @@ -0,0 +1,34 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pydantic import Field +from sparsify.auto.tasks import BaseArgs + + +__all__ = ["FineTuneTrainArgs"] + + +class FineTuneTrainArgs(BaseArgs): + yaml: str = Field( + default=None, + description="path to the training yaml", + ) + checkpoints: str = Field( + default=None, + description="path to the directory to store checkpoints", + ) + logging: str = Field( + default=None, + description="path to store logs", + ) diff --git a/src/sparsify/auto/tasks/finetune/finetune.py b/src/sparsify/auto/tasks/finetune/finetune.py new file mode 100644 index 00000000..ce113d81 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/finetune.py @@ -0,0 +1,372 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from enum import Enum +from pathlib import Path +from typing import Dict, Tuple, Union + +import torch +from torch.utils.data import DataLoader + +import click +from composer import Trainer +from composer.core import Evaluator +from composer.models import HuggingFaceModel +from composer.utils import dist, get_device, reproducibility +from llmfoundry import ( + COMPOSER_MODEL_REGISTRY, + build_finetuning_dataloader, + build_text_denoising_dataloader, +) +from llmfoundry.data.text_data import build_text_dataloader +from llmfoundry.utils.builders import ( + build_logger, + build_optimizer, + build_scheduler, + build_tokenizer, +) +from llmfoundry.utils.config_utils import update_batch_size_info +from omegaconf import DictConfig +from omegaconf import OmegaConf as om +from sparsify.auto.tasks.finetune.helpers import MaskPrunedWeights, attach_masks +from transformers import PreTrainedTokenizerBase + + +__all__ = ["FineTuner"] + +TEXT_DENOISING_MODELS = ["hf_prefix_lm", "hf_t5"] +TEXT_MODELS = ["hf_causal_lm"] + +_LOGGER = logging.getLogger(__name__) +_LOGGER.setLevel(logging.INFO) + + +class LLMDataTypes(Enum): + TEXT = "text" + TEXT_DENOISING = "text_denoising" + FINETUNING = "finetuning" + + +class FineTuner: + + """ + LLMFinetuner which allows finetuning of LLM Models using llmfoundry. Finetuning is + heavily dependent on providing a llmfoundary-compliant yaml file which sets up + the training, including which pretrained model to pull as well as the data that is + to be used for finetuning. Please see the example yaml under samples or the + llmfoundry repo for additional examples: + https://github.com/mosaicml/llm-foundry/blob/main/scripts/train/finetune_example/ + """ + + def __init__( + self, + dataset_path: Union[str, Path], + train_directory: Union[str, Path], + log_dir: Union[str, Path], + ) -> None: + """ + :param dataset_path: path to the llmfoundry compliant yaml file + :param train_directory: path to log the checkpoints for the model + :param log_dir: path to store the specified logger (such as tensorboard) + + """ + if os.path.exists(dataset_path): + if Path(dataset_path).suffix not in [".yaml", ".yml"]: + raise RuntimeError( + "LLMFinetuner expects a yaml file compliant with llmfoundry." + ) + with open(dataset_path) as yaml_file: + self._train_config = om.load(yaml_file) + else: + raise FileNotFoundError( + f"{dataset_path} does not exist. Plase ensure " + " the yaml file exists and the path provided is correct." + ) + + if self._train_config.get("loggers"): + for _, log_config in self._train_config["loggers"].items(): + if "log_dir" in log_config: + log_config["log_dir"] = os.path.join(log_dir, log_config["log_dir"]) + else: + log_config["log_dir"] = log_dir + + self._train_config.save_folder = os.path.join( + train_directory, Path(self._train_config.save_folder) + ) + self._model_name = self._train_config["model"]["name"] + self._validate_yaml() + + @property + def model_name(self) -> str: + """ + :return: model name for the LLM + """ + return self._model_name + + def _validate_yaml(self): + """ + Validate that the provided yaml is compatible with llmfoundry. + """ + if not self._train_config.get("train_loader"): + raise ValueError( + "the provided config file is missing details on the train_loader" + ) + + data_loaders = [self._train_config.get("train_loader")] + if self._train_config.get("eval_loader"): + data_loaders.append(self._train_config.get("eval_loader")) + + for loader in data_loaders: + if loader["name"] == LLMDataTypes.TEXT.value: + if self.model_name in TEXT_DENOISING_MODELS: + raise ValueError( + f"Model type {self.model_name} is not supported " + " for text dataloaders. Please use the " + " text_denoising dataloader." + ) + elif loader["name"] == LLMDataTypes.TEXT_DENOISING.value: + if self.model_name in TEXT_MODELS: + raise ValueError( + f"Model type {self.model_name} is not supported " + " for text_denoising dataloaders. Please use the " + " text dataloader." + ) + + def _build_model(self, tokenizer: PreTrainedTokenizerBase) -> HuggingFaceModel: + """ + Based on the model name, pull and return the pretrained hugging face model. + + :param tokenizer: transformers tokenizer + :return: HuggingFaceModel from the mosaicml composer library + """ + if self.model_name not in COMPOSER_MODEL_REGISTRY: + raise ValueError( + "Please ensure the model name provided is one of " + f" {list(COMPOSER_MODEL_REGISTRY.keys())}" + ) + return COMPOSER_MODEL_REGISTRY[self.model_name]( + self._train_config.model, tokenizer + ) + + def _load_weights_and_attach_masks( + self, tokenizer: PreTrainedTokenizerBase + ) -> Tuple[torch.nn.Module, Union[None, "MaskPrunedWeights"]]: + """ + If a load_path is provided, attempt to load in weights from the specified + location. Because the mask may be sparse, attach masks, masking where the + weights have already been pruned. + + :return: tuple including the model with weights loaded from the `load_path` + and with buffers attached for pruning masks. Also returns the MaskPrunedWeights + algorithm. + """ + model = self._build_model(tokenizer) + try: + model.load_state_dict( + torch.load(self._train_config.get("load_path"), map_location="cpu")[ + "state" + ]["model"], + strict=True, + ) + except Exception as e: + _LOGGER.error(f" Failed to load weights. Returning pretrained model {e}") + if self._train_config.model.pretrained is False: + self._train_config.model.pretrained = True + model = self._build_model(tokenizer) + return model, None + + attach_masks(model) + return model, MaskPrunedWeights() + + def _build_dataloaders( + self, + dataloader_config: DictConfig, + tokenizer: PreTrainedTokenizerBase, + device_batch_size: int, + ) -> DataLoader: + """ + Build a torch dataloader given a DictConfig containing details about the + dataloader, the tokenizer that is to be applied to the data, and the batch size + for the dataloader. + + :param dataloader_config DictConfig from the omegaconf library, containing + details on the dataloader + :param tokenizer: transformers tokenizer + :param device_batch_size: batch size for the dataloader + :return: a torch DataLoader + """ + if dataloader_config.name == LLMDataTypes.TEXT.value: + return build_text_dataloader( + dataloader_config, + tokenizer, + device_batch_size, + ) + elif dataloader_config.name == LLMDataTypes.TEXT_DENOISING.value: + return build_text_denoising_dataloader( + dataloader_config, + tokenizer, + device_batch_size, + ) + elif dataloader_config.name == LLMDataTypes.FINETUNING.value: + return build_finetuning_dataloader( + dataloader_config, + tokenizer, + device_batch_size, + ) + + def _get_fsdp_config(self) -> Union[Dict, None]: + """ + Fetch the fsdp configuration. If <= one gpu devices are available, fsdp is + turned off. + + :return: fsdp dictionary if number of cuda devices available is > one, else None + """ + fsdp_config = self._train_config.get("fsdp_config", None) + fsdp_config = ( + om.to_container(fsdp_config, resolve=True) if fsdp_config else None + ) + + if dist.get_world_size() <= 1: + fsdp_config = None + + return fsdp_config + + def _build_trainer(self) -> Trainer: + """ + Build the trainer object. This involves loading the pretrained model, fetching + the tokenizer, and setting up the dataloaders, optimizer, and scheduler. + + :return: mosaicml composer Trainer object + """ + reproducibility.seed_all(self._train_config.seed) + if dist.get_world_size() > 1: + dist.initialize_dist(get_device(None)) + + self._train_config = update_batch_size_info(self._train_config) + + tokenizer = build_tokenizer(self._train_config.tokenizer) + + algorithms = [] + # If a load_path is provided, try loading weights from the provided path + if self._train_config.get("load_path"): + self._train_config.model.pretrained = False + else: + self._train_config.model.pretrained = True + + model, algorithm = self._load_weights_and_attach_masks(tokenizer) + if algorithm: + algorithms.append(algorithm) + + optimizer = build_optimizer(self._train_config.optimizer, model) + scheduler = build_scheduler(self._train_config.scheduler) + + loggers = [ + build_logger(name, logger_cfg) + for name, logger_cfg in (self._train_config.get("loggers") or {}).items() + ] + + train_loader = self._build_dataloaders( + self._train_config.train_loader, + tokenizer, + self._train_config.device_train_batch_size, + ) + eval_loader = Evaluator( + label="eval", + dataloader=self._build_dataloaders( + self._train_config.eval_loader, + tokenizer, + self._train_config.device_eval_batch_size, + ), + metric_names=list(model.train_metrics.keys()), + ) + + trainer = Trainer( + run_name=self._train_config.run_name, + model=model, + train_dataloader=train_loader, + eval_dataloader=[eval_loader], + optimizers=optimizer, + schedulers=scheduler, + loggers=loggers, + algorithms=algorithms, + max_duration=self._train_config.max_duration, + eval_interval=self._train_config.eval_interval, + precision=self._train_config.precision, + fsdp_config=self._get_fsdp_config(), + save_folder=self._train_config.save_folder, + eval_subset_num_batches=self._train_config.get( + "eval_subset_num_batches", -1 + ), + log_to_console=self._train_config.get("log_to_console", False), + progress_bar=self._train_config.get("progress_bar", True), + console_log_interval=self._train_config.get("console_log_interval", "1ba"), + device_train_microbatch_size=self._train_config.get( + "device_train_microbatch_size", "auto" + ), + save_filename=self._train_config.get( + "save_filename", "ep{epoch}-ba{batch}-rank{rank}.pt" + ), + save_latest_filename=self._train_config.get( + "save_latest_filename", "latest-rank{rank}.pt" + ), + save_interval=self._train_config.get("save_interval", "1000ba"), + save_num_checkpoints_to_keep=self._train_config.get( + "save_num_checkpoints_to_keep", 1 + ), + save_overwrite=self._train_config.get("save_overwrite", False), + autoresume=self._train_config.get("autoresume", False), + dist_timeout=self._train_config.get("dist_timeout", 600.0), + ) + return trainer + + def fine_tune(self): + """ + Run finetuning using the trainer object. Finetuned models will be checkpointed + to the coonfigured directory. + """ + trainer = self._build_trainer() + trainer.fit() + + +@click.command() +@click.option("--yaml", default=None, type=str, help="Path to the training yaml") +@click.option( + "--checkpoints", + default=None, + type=str, + help="Path to directory to store checkpoints", +) +@click.option("--logging", default=None, type=str, help="Path to store log") +def parse_args_and_run( + yaml: Union[str, Path], + checkpoints: Union[str, Path], + logging: Union[str, Path], +): + """ + Serves as the entrypoint for ddp LLM finetuning. + + :param yaml: path to the llmfoundry compliant yaml file + :param checkpoints: path to log the checkpoints for the model + :param logging: path to store the specified logger (such as tensorboard) + """ + finetuner = FineTuner(yaml, checkpoints, logging) + finetuner.fine_tune() + + +# train_hook +def main(**kwargs): + finetuner = FineTuner(kwargs["yaml"], kwargs["checkpoints"], kwargs["logging"]) + finetuner.fine_tune() diff --git a/src/sparsify/auto/tasks/finetune/helpers.py b/src/sparsify/auto/tasks/finetune/helpers.py new file mode 100644 index 00000000..b0bfaa47 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/helpers.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from composer.core import Algorithm, Event + + +all = ["attach_masks", "MaskPrunedWeights"] + + +class MaskPrunedWeights(Algorithm): + """ + Composer specific hook which allows us to mask weights after a specific event, + in this case at the end of the batch. Provided as input to the Trainer while + finetuning. Note: can also mask weights before the forward pass by adding + `or event == Event.BATCH_START` + """ + + def match(self, event, state): + return event == Event.BATCH_END + + @torch.no_grad() + def apply(self, event, state, logger): + def mask_weights(module): + if hasattr(module, "constant_pruning_mask"): + module.weight *= module.constant_pruning_mask + + state.model.apply(mask_weights) + + +def attach_masks(model: torch.nn.Module): + """ + Recursively attach masks to weights which have already been pruned to avoid + finetuning them further. + + :param model: torch.nnn.Module to recursively attach masks to if the weights are + already pruned + """ + for _, module in model.named_children(): + if isinstance(module, torch.nn.Linear): + constant_pruning_mask = torch.where( + module.weight == 0, + torch.tensor(0, dtype=torch.uint8), + torch.tensor(1, dtype=torch.uint8), + ) + module.register_buffer( + "constant_pruning_mask", constant_pruning_mask, persistent=False + ) + else: + attach_masks(module) diff --git a/src/sparsify/auto/tasks/finetune/runner.py b/src/sparsify/auto/tasks/finetune/runner.py new file mode 100644 index 00000000..5fe8d06a --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/runner.py @@ -0,0 +1,75 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple + +from pydantic import BaseModel +from sparsify.auto.tasks.finetune.args import FineTuneTrainArgs +from sparsify.auto.tasks.finetune.finetune import main as train_hook +from sparsify.auto.tasks.runner import TaskRunner +from sparsify.auto.utils import HardwareSpecs +from sparsify.schemas import Metrics, SparsificationTrainingConfig +from sparsify.utils import TASK_REGISTRY + + +__all__ = [ + "LLMFinetuner", +] + + +@TaskRunner.register_task(task=TASK_REGISTRY["finetune"]) +class LLMFinetuner(TaskRunner): + """ + TaskRunner for LLM finetuning. Currently set-up as a shell to leverage TaskRunner's + ddp functionality for finetuning. Function definitions will be completed as + functionality is further supported. + """ + + train_hook = staticmethod(train_hook) + export_model_kwarg = "None" + + def __init__(self, config: SparsificationTrainingConfig): + super().__init__(config) + + @classmethod + def config_to_args( + cls, config: SparsificationTrainingConfig + ) -> Tuple[BaseModel, BaseModel]: + train_args = FineTuneTrainArgs(yaml=config.dataset) + + return train_args, None + + def update_run_directory_args(self): + pass + + def _train_completion_check(self) -> bool: + pass + + def _export_completion_check(self) -> bool: + pass + + def _update_train_args_post_failure(self, error_type: Exception): + pass + + def _update_export_args_post_failure(self, error_type: Exception): + pass + + def _get_metrics(self) -> Metrics: + pass + + def _get_default_deployment_directory(self, train_directory: str) -> str: + pass + + def tune_args_for_hardware(self, hardware_specs: HardwareSpecs): + pass diff --git a/src/sparsify/auto/tasks/image_classification/args.py b/src/sparsify/auto/tasks/image_classification/args.py index 86b59013..a53c5896 100644 --- a/src/sparsify/auto/tasks/image_classification/args.py +++ b/src/sparsify/auto/tasks/image_classification/args.py @@ -158,3 +158,10 @@ class ImageClassificationExportArgs(_ImageClassificationBaseArgs): num_classes: Optional[int] = Field( default=None, description="number of classes for model load/export" ) + convert_qat: bool = Field( + default=True, + description=( + "if True, exports of torch QAT graphs will be converted to a fully " + "quantized representation. Default is True" + ), + ) diff --git a/src/sparsify/auto/tasks/image_classification/runner.py b/src/sparsify/auto/tasks/image_classification/runner.py index b7bd486e..665c1954 100644 --- a/src/sparsify/auto/tasks/image_classification/runner.py +++ b/src/sparsify/auto/tasks/image_classification/runner.py @@ -77,6 +77,7 @@ def config_to_args( if "dataset" not in config.kwargs: # custom datasets are set to imagefolder config.kwargs["dataset"] = "imagefolder" + if "model_tag" not in config.kwargs: config.kwargs["model_tag"] = "sparsify_auto_image_classification" train_args = ImageClassificationTrainArgs( diff --git a/src/sparsify/auto/tasks/object_detection/yolov5/args.py b/src/sparsify/auto/tasks/object_detection/yolov5/args.py index 8d766ef8..802b48ad 100644 --- a/src/sparsify/auto/tasks/object_detection/yolov5/args.py +++ b/src/sparsify/auto/tasks/object_detection/yolov5/args.py @@ -46,7 +46,7 @@ class _Yolov5BaseTrainArgs(BaseArgs): noautoanchor: bool = Field(default=False, description="disable AutoAnchor") bucket: str = Field(default="", description="gsutil bucket") cache: str = Field( - default="ram", description='--cache images in "ram" (default) or "disk"' + default="disk", description='--cache images in "ram" or "disk" (default)' ) image_weights: bool = Field( default=False, description="use weighted image selection for training" diff --git a/src/sparsify/auto/tasks/object_detection/yolov5/runner.py b/src/sparsify/auto/tasks/object_detection/yolov5/runner.py index bb29f067..5f1784c8 100644 --- a/src/sparsify/auto/tasks/object_detection/yolov5/runner.py +++ b/src/sparsify/auto/tasks/object_detection/yolov5/runner.py @@ -27,7 +27,7 @@ from sparseml.yolov5.scripts import train as train_hook from sparsify.auto.tasks.object_detection.yolov5 import Yolov5ExportArgs from sparsify.auto.tasks.runner import DDP_ENABLED, TaskRunner -from sparsify.auto.utils import HardwareSpecs +from sparsify.auto.utils import HardwareSpecs, create_yolo_data_yaml from sparsify.schemas import Metrics, SparsificationTrainingConfig from sparsify.utils import TASK_REGISTRY from yolov5.models.experimental import attempt_load @@ -80,11 +80,12 @@ def config_to_args( :param config: training config to generate run for :return: tuple of training and export arguments """ + dataset = create_yolo_data_yaml(config.dataset) train_args = Yolov5TrainArgs( weights=config.base_model, recipe=config.recipe, recipe_args=config.recipe_args, - data=config.dataset, + data=dataset, **config.kwargs, ) diff --git a/src/sparsify/auto/tasks/runner.py b/src/sparsify/auto/tasks/runner.py index ee48b264..6861d753 100644 --- a/src/sparsify/auto/tasks/runner.py +++ b/src/sparsify/auto/tasks/runner.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. - import gc import json +import logging import os +import pkgutil import shutil import socket import warnings @@ -29,7 +30,7 @@ from pydantic import BaseModel from sparsify.auto.utils import ErrorHandler, HardwareSpecs, analyze_hardware from sparsify.schemas import Metrics, SparsificationTrainingConfig -from sparsify.utils import TASK_REGISTRY, TaskName +from sparsify.utils import TASK_REGISTRY, TaskName, get_task_info __all__ = [ @@ -41,7 +42,9 @@ "TaskRunner", ] -DDP_ENABLED = not (os.environ.get("NM_AUTO_DISABLE_DDP", False)) +DDP_ENABLED = ( + not (os.environ.get("NM_AUTO_DISABLE_DDP", False)) and torch.cuda.is_available() +) MAX_RETRY_ATTEMPTS = os.environ.get("NM_MAX_SCRIPT_RETRY_ATTEMPTS", 3) # default: 3 MAX_MEMORY_STEPDOWNS = os.environ.get("NM_MAX_SCRIPT_MEMORY_STEPDOWNS", 10) SUPPORTED_TASKS = [ @@ -52,9 +55,12 @@ "question_answering", "text_classification", "token_classification", + "finetune", ] ] _TASK_RUNNER_IMPLS = {} +_LOGGER = logging.getLogger(__name__) +_LOGGER.setLevel(logging.INFO) # set at top level to modify later def retry_stage(stage: str): @@ -148,7 +154,6 @@ def __init__(self, config: SparsificationTrainingConfig): self.dashed_cli_kwargs = False # True if CLI args require "-" as word separator self.train_args, self.export_args = self.config_to_args(self.config) - self.hardware_specs = analyze_hardware() self.tune_args_for_hardware(self.hardware_specs) @@ -266,10 +271,12 @@ def _train_distributed(self): "--nproc_per_node", "auto", f"--master_port={_get_open_port_()}", - self.sparseml_train_entrypoint, ] + if self._config.task in get_task_info("finetune").aliases: + ddp_args += ["finetune"] + else: + ddp_args += [self.sparseml_train_entrypoint] ddp_args += self.train_args.serialize_to_cli_string(self.dashed_cli_kwargs) - launch_ddp(ddp_args) @retry_stage(stage="train") @@ -289,6 +296,10 @@ def train(self, train_directory: str, log_directory: str) -> Metrics: self.log_directory = log_directory self.update_run_directory_args() + if self._config.task in get_task_info("finetune").aliases: + self.train_args.checkpoints = self.run_directory + self.train_args.logging = self.log_directory + if self.use_distributed_training: self._train_distributed() else: @@ -392,20 +403,26 @@ def create_deployment_directory(self, train_directory: str, deploy_directory: st """ Creates and/or moves deployment directory to the deployment directory for the mode corresponding to the trial_idx - + :post-condition: The deployment artifacts will be moved from + origin_directory to deploy_directory :param train_directory: directory to grab the exported files from :param deploy_directory: directory to save the deployment files to """ origin_directory = self._get_default_deployment_directory(train_directory) - + _LOGGER.info("Moving %s to %s" % (origin_directory, deploy_directory)) for filename in os.listdir(origin_directory): source_file = os.path.join(origin_directory, filename) target_file = os.path.join(deploy_directory, filename) shutil.move(source_file, target_file) + + _LOGGER.info("Deleting %s" % origin_directory) shutil.rmtree(origin_directory) - with open(os.path.join(deploy_directory, "readme.txt"), "x") as f: - f.write("deployment instructions will go here") + readme_path = os.path.join(deploy_directory, "README.md") + instruc = pkgutil.get_data("sparsify.auto", "tasks/deployment_instructions.md") + with open(readme_path, "wb") as f: + f.write(instruc) + _LOGGER.info("Deployment directory moved to %s" % deploy_directory) @abstractmethod def _train_completion_check(self) -> bool: @@ -495,7 +512,8 @@ def _dynamically_register_integration_runner(task: str): from sparsify.auto.tasks.image_classification import ( # noqa F401 ImageClassificationRunner, ) - + elif TASK_REGISTRY[task].domain == "llm": + from sparsify.auto.tasks.finetune import LLMFinetuner # noqa F401 else: raise ValueError( f"Task {task} is not yet supported. TaskRunner implementation " diff --git a/src/sparsify/auto/tasks/transformers/__init__.py b/src/sparsify/auto/tasks/transformers/__init__.py index b8794631..a3f3f4ca 100644 --- a/src/sparsify/auto/tasks/transformers/__init__.py +++ b/src/sparsify/auto/tasks/transformers/__init__.py @@ -15,5 +15,17 @@ # flake8: noqa # isort: skip_file + +def _check_nm_install(): + try: + from .runner import * + except ImportError as exception: + raise ImportError( + "Please install sparsify[nm] to use this pathway." + ) from exception + + +_check_nm_install() + from .args import * from .runner import * diff --git a/src/sparsify/auto/tasks/transformers/args.py b/src/sparsify/auto/tasks/transformers/args.py index 3ae4ed62..268cddb3 100644 --- a/src/sparsify/auto/tasks/transformers/args.py +++ b/src/sparsify/auto/tasks/transformers/args.py @@ -697,7 +697,7 @@ class TransformersExportArgs(BaseArgs): description="Sequence length to use. Default is 384. Can be overwritten later", ) no_convert_qat: bool = Field( - default=True, + default=False, description=( "Set flag to not perform QAT to fully quantized conversion after export" ), diff --git a/src/sparsify/auto/tasks/transformers/runner.py b/src/sparsify/auto/tasks/transformers/runner.py index 2a7451e6..20f8a154 100644 --- a/src/sparsify/auto/tasks/transformers/runner.py +++ b/src/sparsify/auto/tasks/transformers/runner.py @@ -15,7 +15,9 @@ import json import math import os -from typing import Tuple +import re +import warnings +from typing import Tuple, Union import onnx @@ -67,11 +69,23 @@ def config_to_args( :param config: training config to generate run for :return: tuple of training and export arguments """ + dataset_name, data_file_args = cls.parse_data_args(config.dataset) + config.kwargs.update(data_file_args) + + if config.task == TASK_REGISTRY.get("text_classification") and ( + dataset_name in _GLUE_TASK_NAMES + ): + # text classification GLUE datasets need special treatment + # since the proper dataset names are set as "task" with + # the top level dataset as "glue" + config.kwargs["task_name"] = dataset_name + dataset_name = "glue" + train_args = cls.train_args_class( model_name_or_path=config.base_model, recipe=config.recipe, recipe_args=config.recipe_args, - dataset_name=config.dataset, + dataset_name=dataset_name, distill_teacher=config.distill_teacher if not config.distill_teacher == "off" else "disable", @@ -84,6 +98,80 @@ def config_to_args( return train_args, export_args + @classmethod + def parse_data_args(cls, dataset: str) -> Tuple[Union[str, None], dict]: + """ + Check if the dataset provided is a data directory. If it is, update the train, + test and validation file arguments with the approriate filepaths. This function + assumes any file containing the substrings "train", "test", or "val" are the + data files expected to be used. Duplicates will be updated to only use one file + path. Also, existing kwargs for train, test and validation files will be + overwritten if directory is provided. + + Example directory structure: + - data_for_training/ + - some_train_file.json + - some_validation_file.json + - test_dir/ + - some_test_file.json + + :params dataset: inputted data string arg. Assumed to either be a dataset which + can be downloaded publicly or a locally available directory containing + data files. + + :returns: updated dataset, train_file, test_file, and validation_file args + """ + data_file_args = {} + + def _check_and_update_file(root: str, current_file: str, file_type: str): + split_type = file_type.split("_")[0] + + if data_file_args.get(file_type, None): + warnings.warn( + f"A {split_type} file was already found with name " + f"{data_file_args[file_type]}. Updating with {current_file} " + ) + + if not current_file.lower().endswith(("json", "csv")): + warnings.warn( + f"Found {split_type} file named {current_file} " + "with incorrect file type (expected: json or csv). Skipping file." + ) + else: + data_file_args[file_type] = os.path.join(root, current_file) + + if os.path.isdir(dataset): + for root, _, files in os.walk(dataset): + for f in files: + if re.search(r"train", f): + _check_and_update_file(root, f, "train_file") + elif re.search(r"val", f): + _check_and_update_file(root, f, "validation_file") + elif re.search(r"test", f): + _check_and_update_file(root, f, "test_file") + + if ( + data_file_args.get("train_file", None) + and data_file_args.get("validation_file", None) + and data_file_args.get("test_file", None) + ): + break + + if not ( + data_file_args.get("train_file", None) + and data_file_args.get("validation_file", None) + ): + raise Exception( + "No training or validation files found. Be sure the " + "directory provided to the data arg contains json or csv " + "files with the train and val substrings in the filenames." + ) + + if data_file_args: + dataset = None + + return dataset, data_file_args + def tune_args_for_hardware(self, hardware_specs: HardwareSpecs): """ Update run args based on detected hardware specifications @@ -251,6 +339,23 @@ class QuestionAnsweringRunner(_TransformersRunner): } +# https://huggingface.co/datasets/glue +_GLUE_TASK_NAMES = { + "ax", + "cola", + "mnli", + "mnli_matched", + "mnli_mismatched", + "mrpc", + "qnli", + "qqp", + "rte", + "sst2", + "stsb", + "wnli", +} + + def _load_model_on_task(model_name_or_path, model_type, task, **model_kwargs): load_funcs = { "masked_language_modeling": SparseAutoModel.masked_language_modeling_from_pretrained, # noqa diff --git a/src/sparsify/auto/utils/error_handler.py b/src/sparsify/auto/utils/error_handler.py index 65bc533c..7240fa68 100644 --- a/src/sparsify/auto/utils/error_handler.py +++ b/src/sparsify/auto/utils/error_handler.py @@ -154,7 +154,7 @@ def raise_exception_summary(self): if all( [ ( - (type(error) == type(first_error)) + (type(error) is type(first_error)) and (error.args == first_error.args) ) for error in self._caught_runtime_errors diff --git a/src/sparsify/auto/utils/helpers.py b/src/sparsify/auto/utils/helpers.py index 49b1d22d..d371624e 100644 --- a/src/sparsify/auto/utils/helpers.py +++ b/src/sparsify/auto/utils/helpers.py @@ -15,8 +15,10 @@ """ Generic helpers for sparsify.auto """ +import glob import logging import os +import re from collections import OrderedDict from datetime import datetime from typing import Any, Dict, List, Tuple, Union @@ -32,6 +34,7 @@ "load_raw_config_history", "best_n_trials_from_history", "initialize_banner_logger", + "create_yolo_data_yaml", ] SAVE_DIR = "{{run_mode}}_{{task}}{:_%Y_%m_%d_%H_%M_%S}".format(datetime.now()) @@ -47,6 +50,109 @@ def initialize_banner_logger(): logger.addHandler(handler) +def create_yolo_data_yaml(dataset: str) -> str: + """ + Check if the dataset provided is a data directory. If it is, check if there is + a yaml file within the directory and return the path to the yaml. If not, build + a yolov5 yaml file based on the provided data directory path. An example of the + directory structure for the provided directory path is shown below. There must + subdirectories in the provided directory named `images`, `labels` and a text + file called `classes.txt` which includes the list of the classes for the + particular dataset, ordered by class id. The `images` and `labels` folders + should contain identically named train, test, and validation data folder. + For details on what images and labels should look like, please see the yolov5 + repository: https://github.com/ultralytics/yolov5/tree/master. + + Example directory structure: + - data_for_training/ + - labels/ + - train/ + - val/ + - test/ + - images/ + - train/ + - val/ + - test/ + - classes.txt + + :params dataset: inputted data string arg. Assumed to either be a dataset which + can be downloaded publicly or a locally available directory containing + data files. + + :returns: path to yaml to download or the newly built yaml. If the data string + arg is a yaml for a publicly available dataset, this function will return the + same string. Otherwise, the path to the newly generated yaml will be returned. + """ + data_file_args = {} + image_dir = "images" + class_path = "classes.txt" + yaml_path = "data_local.yaml" + + def _check_and_update_file(file_type: str, path: str): + if data_file_args.get(file_type, None): + data_file_args[file_type].append(path) + else: + data_file_args[file_type] = [path] + + # Case where the user provides just a yaml file path + if not os.path.isdir(dataset): + return dataset + + # Case where the user provides a data directory with a yaml file + # Only one will be returned if multiple are provided + yaml_paths = glob.glob(f"{dataset}/*.y*ml") + if len(yaml_paths) > 0: + return yaml_paths[0] + + image_path = os.path.join(dataset, image_dir) + class_list_path = os.path.join(dataset, class_path) + + if not os.path.exists(image_path): + raise ValueError( + f"The the provided directory path {dataset} " + "does not contain a folder called `images`. A subdirectory must " + "exist which contains the data folders." + ) + + if not os.path.exists(class_list_path): + raise ValueError( + f"The the provided directory path {dataset} " + "does not contain a classes.txt file. A file must be " + "present which includes a list of the classes for the dataset." + ) + + data_file_args["path"] = dataset + + for d in os.listdir(image_path): + current_path = os.path.join(image_dir, d) + if re.search(r"train", d): + _check_and_update_file("train", current_path) + elif re.search(r"val", d): + _check_and_update_file("val", current_path) + elif re.search(r"test", d): + _check_and_update_file("test", current_path) + + if not (data_file_args.get("train") and data_file_args.get("val")): + raise Exception( + "No training or validation folders found. Be sure the " + "directory provided to the data arg contains folders " + "with the train and val substrings in the filenames." + ) + + # Store the newly generated yaml in the same directory as the data + dataset = os.path.join(dataset, yaml_path) + + with open(class_list_path, "r") as f: + class_list = f.readlines() + + classes = {idx: label.strip() for idx, label in enumerate(class_list)} + + with open(dataset, "w") as f: + yaml.safe_dump({**data_file_args, "names": classes}, f, sort_keys=False) + + return dataset + + def create_save_directory(api_args: "APIArgs") -> Tuple[str]: # noqa: F821 """ Create base save directory structure for a single sparsify.auto run diff --git a/src/sparsify/check_environment/__init__.py b/src/sparsify/check_environment/__init__.py new file mode 100644 index 00000000..05666a99 --- /dev/null +++ b/src/sparsify/check_environment/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa +# isort: skip_file + +from .gpu_device import * +from .ort_health import * +from .pathway_checks import * diff --git a/src/sparsify/check_environment/gpu_device.py b/src/sparsify/check_environment/gpu_device.py new file mode 100644 index 00000000..e7d57540 --- /dev/null +++ b/src/sparsify/check_environment/gpu_device.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import torch + + +_LOGGER = logging.getLogger(__name__) + +__all__ = ["check_for_gpu"] + + +def check_for_gpu(): + """ + Check for GPU and warn if not found + """ + _LOGGER.warning("Checking for GPU...") + if not torch.cuda.is_available(): + _LOGGER.warn( + "*************************** NO GPU DETECTED ***************************\n" + "No GPU(s) detected on machine. The use of a GPU for training-aware " + "sparsification, sparse-transfer learning, and one-shot sparsification is " + "highly recommended.\n" + "************************************************************************" + ) + else: + _LOGGER.warning("GPU check completed successfully") diff --git a/src/sparsify/check_environment/main.py b/src/sparsify/check_environment/main.py new file mode 100644 index 00000000..0e88906f --- /dev/null +++ b/src/sparsify/check_environment/main.py @@ -0,0 +1,26 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from sparsify.check_environment import check_for_gpu, check_ort_health + + +def main(): + """ + Check the environment for compatibility with the sparsifyml package + """ + check_for_gpu() + check_ort_health() + + +if __name__ == "__main__": + main() diff --git a/src/sparsify/check_environment/ort_health.py b/src/sparsify/check_environment/ort_health.py new file mode 100644 index 00000000..03ad9f9f --- /dev/null +++ b/src/sparsify/check_environment/ort_health.py @@ -0,0 +1,183 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import signal +from typing import List, Optional + +import numpy +import torch +from onnx import TensorProto, helper + +import onnxruntime as ort +from deepsparse.utils import generate_random_inputs, get_input_names +from sparsify.login import import_sparsifyml_authenticated + + +import_sparsifyml_authenticated() +from sparsifyml.one_shot.utils import run_onnx_model # noqa: E402 + + +__all__ = ["check_ort_health"] + +_LOGGER = logging.getLogger(__name__) + + +CUDA_HELP_STRING = ( + "If you would like to run on GPU, please ensure that your CUDA and cuDNN " + "versions are compatible with the installed version of onnxruntime-gpu: " + "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements" # noqa: E501 +) + + +def _create_simple_conv_graph( + image_pixels_side: int = 32, + channel_count: int = 3, + batch_size: int = 1, + kernel_size: int = 3, + kernel_count: int = 10, +): + feature_size_side = image_pixels_side - kernel_size + 1 + + # The inputs and outputs + X = helper.make_tensor_value_info( + "X", + TensorProto.FLOAT, + [batch_size, channel_count, image_pixels_side, image_pixels_side], + ) + Y = helper.make_tensor_value_info( + "Y", + TensorProto.FLOAT, + [batch_size, kernel_count, feature_size_side, feature_size_side], + ) + + # Create nodes for Conv, Relu, Flatten, and Gemm (Fully Connected) operations + conv_node = helper.make_node( + "Conv", + inputs=["X", "conv_weight", "conv_bias"], + outputs=["conv_result"], + kernel_shape=[kernel_size, kernel_size], + ) + + relu_node1 = helper.make_node( + "Relu", + inputs=["conv_result"], + outputs=["Y"], + ) + + # Define the weights for the Conv and Gemm layers + conv_weight = helper.make_tensor( + "conv_weight", + TensorProto.FLOAT, + [kernel_count, channel_count, kernel_size, kernel_size], + numpy.random.randn(kernel_count, channel_count, kernel_size, kernel_size), + ) + conv_bias = helper.make_tensor( + "conv_bias", TensorProto.FLOAT, [kernel_count], numpy.random.randn(kernel_count) + ) + + # Create the graph (model) + + graph_def = helper.make_graph( + [conv_node, relu_node1], + "SimpleCNN", + inputs=[X], + outputs=[Y], + initializer=[conv_weight, conv_bias], + ) + + return helper.make_model(graph_def, producer_name="onnx-example") + + +def check_ort_health(providers: Optional[List[str]] = None): + """ + Checks that the model can be executed with the set providers + + :param model: model to check + :param providers: list of providers use for ORT execution + """ + _LOGGER.warning("Checking onnxruntime-gpu environment health...") + + model = _create_simple_conv_graph() + + providers = ( + ["CUDAExecutionProvider"] + if torch.cuda.is_available() + else ["CPUExecutionProvider"] + ) + + # If cuda device found by torch, ensure it's found by ORT as well + if ort.get_device() != "GPU" and "CUDAExecutionProvider" in providers: + raise RuntimeError( + "CUDA enabled device detected on your machine, but is not detected by " + "onnxruntime. If you would like to run on CPU, please set " + "CUDA_VISIBLE_DEVICES=-1. Note that this is likely to slow down model " + f"compression significantly. {CUDA_HELP_STRING}" + ) + + # Ensure that ORT can execute the model + random_input = { + input_name: input + for input_name, input in zip( + get_input_names(model), generate_random_inputs(model) + ) + } + + # Define a custom exception and signal handler + class _TerminationSignal(Exception): + pass + + def handle_termination_signal(signum, frame): + raise _TerminationSignal("Termination signal received") + + # Register the signal handler for SIGTERM and SIGINT signals + signal.signal(signal.SIGTERM, handle_termination_signal) + signal.signal(signal.SIGINT, handle_termination_signal) + + try: + run_onnx_model( + model=model, + input_batch=random_input, + providers=providers, + ) + except _TerminationSignal as ts: + print("Termination signal caught:", ts) + except Exception as e: + # If run fails, try again with CPU only to ensure this is a CUDA environment + # issue + if providers != ["CPUExecutionProvider"]: + try: + run_onnx_model( + model=model, + input_batch=random_input, + providers=["CPUExecutionProvider"], + ) + + raise RuntimeError( + "ONNXRuntime execution failed with CUDAExecutionProvider" + "but succeeded with CPUExecutionProvider. This is indicative" + f"of a likely issue with nnxruntime-gpu install {CUDA_HELP_STRING}" + ) from e + + except RuntimeError: + pass + + raise RuntimeError( + "ONNXRuntime execution failed with both CUDAExecutionProvider and " + "CPUExecutionProvider. Ensure that onnxruntime-gpu and its dependencies " + "are properly installed." + ) from e + + _LOGGER.warning("onnxruntime-gpu environment check completed successfully") diff --git a/src/sparsify/check_environment/pathway_checks.py b/src/sparsify/check_environment/pathway_checks.py new file mode 100644 index 00000000..2afb6e64 --- /dev/null +++ b/src/sparsify/check_environment/pathway_checks.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from sparsify.check_environment import check_for_gpu, check_ort_health + + +__all__ = ["one_shot_checks", "auto_checks"] + + +def one_shot_checks(): + """ + Check environment for compatibility with one-shot sparsification + """ + check_for_gpu() + check_ort_health() + + +def auto_checks(): + """ + Check environment for compatibility with training-aware sparsification and + sparse-transfer learning + """ + check_for_gpu() diff --git a/src/sparsify/cli/opts.py b/src/sparsify/cli/opts.py index d3dc7599..9efc78e0 100644 --- a/src/sparsify/cli/opts.py +++ b/src/sparsify/cli/opts.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os +from functools import partial import click from sparsify.utils.constants import TASK_REGISTRY @@ -20,7 +22,6 @@ __all__ = [ "EXPERIMENT_TYPE", - "USE_CASE", "PROJECT_ID", "EXPERIMENT_ID", "WORKING_DIR", @@ -42,12 +43,14 @@ "add_optim_opts", ] +_LOGGER = logging.getLogger(__name__) + _EXPERIMENT_TYPES = ["sparse-transfer", "one-shot", "training-aware"] _EVAL_METRICS = ["accuracy", "mAP", "recall", "f1"] # TODO: add back kl _DEPLOY_ENGINES = ["deepsparse", "onnxruntime"] -def validate_use_case(ctx, param, value): +def validate_use_case(ctx, param, value, strict: bool = True): # click validator for --use-case # task_name: TaskName @@ -55,9 +58,18 @@ def validate_use_case(ctx, param, value): # TaskName __eq__ matches against aliases and str standardization if value == task_name: return value - raise ValueError( - f"Unknown use-case {value}, supported use cases: {list(TASK_REGISTRY.keys())}" - ) + + if strict: + raise ValueError( + f"Unknown use-case {value}, supported use cases: " + f"{list(TASK_REGISTRY.keys())}" + ) + else: + _LOGGER.warning( + f"Unknown use-case {value}, full feature set may not be availble for " + "custom use cases" + ) + return value EXPERIMENT_TYPE = click.option( @@ -66,13 +78,6 @@ def validate_use_case(ctx, param, value): type=click.Choice(_EXPERIMENT_TYPES, case_sensitive=False), help="The type of the experiment to run", ) -USE_CASE = click.option( - "--use-case", - required=True, - type=str, - callback=validate_use_case, - help="The task this model is for", -) PROJECT_ID = click.option( "--project-id", default=None, @@ -153,10 +158,21 @@ def validate_use_case(ctx, param, value): TRAIN_KWARGS = click.option("--train-kwargs", default=None, type=str) -def add_info_opts(f): - for fn in [WORKING_DIR, EXPERIMENT_ID, PROJECT_ID, USE_CASE]: - f = fn(f) - return f +def add_info_opts(*, require_known_use_case=True): + use_case = click.option( + "--use-case", + required=True, + type=str, + callback=partial(validate_use_case, strict=require_known_use_case), + help="The task this model is for", + ) + + def wrapped(f): + for fn in [WORKING_DIR, EXPERIMENT_ID, PROJECT_ID, use_case]: + f = fn(f) + return f + + return wrapped def add_model_opts(*, require_model: bool, include_optimizer: bool = False): diff --git a/src/sparsify/cli/run.py b/src/sparsify/cli/run.py index fc19c5b7..b1d28d4e 100644 --- a/src/sparsify/cli/run.py +++ b/src/sparsify/cli/run.py @@ -17,6 +17,8 @@ from pathlib import Path import click +from sparsezoo import Model +from sparsify.check_environment import auto_checks, one_shot_checks from sparsify.cli import opts @@ -32,7 +34,7 @@ def main(): @main.command() -@opts.add_info_opts +@opts.add_info_opts(require_known_use_case=False) @opts.add_model_opts(require_model=True) @opts.add_data_opts @opts.add_deploy_opts @@ -41,15 +43,19 @@ def one_shot(**kwargs): """ One shot sparsification of ONNX models """ + kwargs["optim_level"] = _validate_optim_level(kwargs.get("optim_level")) + # raises exception if sparsifyml not installed from sparsify.one_shot import one_shot + one_shot_checks() + recipe_args = kwargs.get("recipe_args") if isinstance(recipe_args, str): recipe_args = json.loads(recipe_args) one_shot.one_shot( - model=Path(kwargs["model"]), + model=Path(_maybe_unwrap_zoo_stub(kwargs["model"])), dataset_dir=Path(kwargs["data"]), num_samples=kwargs["train_samples"] or None, deploy_dir=Path(kwargs["working_dir"]), @@ -62,7 +68,7 @@ def one_shot(**kwargs): @main.command() -@opts.add_info_opts +@opts.add_info_opts(require_known_use_case=True) @opts.add_model_opts(require_model=False) @opts.add_data_opts @opts.add_deploy_opts @@ -72,14 +78,18 @@ def sparse_transfer(**kwargs): """ Run sparse transfer learning for a use case against a supported task and model """ + kwargs["optim_level"] = _validate_optim_level(kwargs.get("optim_level")) + from sparsify import auto + auto_checks() + # recipe arg should be a sparse transfer recipe auto.main(_parse_run_args_to_auto(sparse_transfer=True, **kwargs)) @main.command() -@opts.add_info_opts +@opts.add_info_opts(require_known_use_case=True) @opts.add_model_opts(require_model=True) @opts.add_data_opts @opts.add_deploy_opts @@ -89,8 +99,12 @@ def training_aware(**kwargs): """ Run training aware sparsification for a use case against a supported task and model """ + kwargs["optim_level"] = _validate_optim_level(kwargs.get("optim_level")) + from sparsify import auto + auto_checks() + # recipe arg should be a training aware recipe auto.main(_parse_run_args_to_auto(sparse_transfer=False, **kwargs)) @@ -123,5 +137,31 @@ def _parse_run_args_to_auto(sparse_transfer: bool, **kwargs): ) +def _validate_optim_level(optim_level: float) -> float: + """ + :param optim_level: cli ingested optim_level + :return: optim level scaled from 0-1 + :raises ValueError: for any values that are not float 0-1 or an integer 1-100 + """ + # optim level should always be defaulted by the CLI, asserting here for safety + assert optim_level is not None + + if 0 <= optim_level <= 1: + return optim_level + elif (1 < optim_level <= 100) and optim_level == int(optim_level): + return optim_level / 100.0 + else: + raise ValueError( + "optim-level must be a float value between 0-1 or an integer value " + f"between 0-100. Found {optim_level}" + ) + + +def _maybe_unwrap_zoo_stub(model_path: str) -> str: + if model_path.startswith("zoo:"): + return Model(model_path).onnx_model.path + return model_path + + if __name__ == "__main__": main() diff --git a/src/sparsify/login.py b/src/sparsify/login.py index ff212dff..687ad733 100644 --- a/src/sparsify/login.py +++ b/src/sparsify/login.py @@ -99,7 +99,16 @@ def install_sparsifyml(access_token: str) -> None: :param access_token: The access token to use for authentication """ sparsifyml_spec = importlib.util.find_spec("sparsifyml") - sparsifyml = importlib.import_module("sparsifyml") if sparsifyml_spec else None + + try: + sparsifyml = importlib.import_module("sparsifyml") if sparsifyml_spec else None + except ImportError as sparsifyml_import_error: + raise RuntimeError( + "sparsifyml installation detected in current environment, but an " + "exception was raised on import. ensure python3-dev is installed " + "for your python version and the `libpython` executable is available then " + f"re-run sparsify.login.\n\n{sparsifyml_import_error}" + ) sparsifyml_installed = ( sparsifyml_spec is not None diff --git a/src/sparsify/schemas/auto_api.py b/src/sparsify/schemas/auto_api.py index d6e94102..c34f68a4 100644 --- a/src/sparsify/schemas/auto_api.py +++ b/src/sparsify/schemas/auto_api.py @@ -160,14 +160,14 @@ class SparsificationTrainingConfig(BaseModel): dataset: str = Field( description="path to the dataset to train the task on", ) - base_model: str = Field( + base_model: Optional[str] = Field( description="path to the model to be sparsified", ) distill_teacher: str = Field( description="optional path to a distillation teacher for training", default="auto", ) - recipe: str = Field( + recipe: Optional[str] = Field( description="file path to or zoo stub of sparsification recipe to be applied", ) recipe_args: Dict[str, Any] = Field( diff --git a/src/sparsify/utils/constants.py b/src/sparsify/utils/constants.py index 3d893325..54771000 100644 --- a/src/sparsify/utils/constants.py +++ b/src/sparsify/utils/constants.py @@ -50,6 +50,12 @@ ] TASK_REGISTRY: Dict[str, TaskName] = { + "finetune": TaskName( + name="finetune", + aliases=["finetuning", "fine tune"], + domain="llm", + sub_domain="language_modeling", + ), "image_classification": TaskName( name="image_classification", aliases=["ic", "classification", "cv_classification"],