From 3aef3f877f13364ea03f2d193a0030be0175a97f Mon Sep 17 00:00:00 2001 From: Rob Greenberg <100797996+rgreenberg1@users.noreply.github.com> Date: Fri, 30 Jun 2023 16:35:12 -0700 Subject: [PATCH 01/47] Update README.md - Updated quick links (#240) * Update README.md - Updated quick links * Update README.md - updated links to other docs. * Update cloud-user-guide.md - fixed links and added in comparison area. * Update README.md - fixed a blob/main * Update cli-api-guide.md - updated links * Update cli-api-guide.md - deleted long long * Quality fixes --------- Co-authored-by: Rahul Tuli --- README.md | 69 +++++++++++---- docs/cli-api-guide.md | 181 +++++++++++++++++++++++++++++++++++++++ docs/cloud-user-guide.md | 43 ++++++++-- 3 files changed, 272 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 7f594aaa..cf7cd8a8 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ To empower you in compressing models, Sparsify is made up of two components: the The Sparsify Cloud is a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. The Sparsify CLI/API is a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate into your own workflows. -To get started immediately, [create an account](https://account.neuralmagic.com/signup) and then check out the [Installation](https://github.com/neuralmagic/sparsify/edit/Sparsify-Alpha-README/README.md#installation) and [Quick Start](https://github.com/neuralmagic/sparsify/edit/Sparsify-Alpha-README/README.md#quick-start) sections of this README. +To get started immediately, [create an account](https://account.neuralmagic.com/signup) and then check out the [Installation](#Installation) and [Quick Start](#quick-start) sections of this README. With all of that setup, sparsifying your models is as easy as: ```bash @@ -131,7 +131,7 @@ An account is required to manage your Experiments and API keys. Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and a unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. -For more details, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/docs/cloud-user-guide.md). +For more details, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). ### Install Sparsify @@ -155,9 +155,9 @@ Once you have located this, copy the command or the API key itself and run the f sparsify.login API_KEY ```` -For more details on locating the API_KEY, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/docs/cloud-user-guide.md). +For more details on locating the API_KEY, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). -For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). +For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). ### Run an Experiment @@ -166,7 +166,7 @@ They are the process of applying sparsification algorithms in One-Shot, Training All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. To run an Experiment, you can use either the CLI or the API depending on your use case. The Sparsify Cloud provides a UI for exploring hyperparameters, predicting performance, and generating the desired CLI/API command. -For more info on generating commands from the Sparsify Cloud, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/docs/cloud-user-guide.md). +For more info on generating commands from the Sparsify Cloud, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). The general command for running an Experiment is: @@ -175,11 +175,11 @@ sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --opt ``` Where the values for each of the arguments follow these general rules: -- EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`; see the examples below for more details or the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). -- USE_CASE: the use case you're solving for such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/docs/use-cases-guide.md). -- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/docs/models-guide.md). -- DATA: the dataset you want to use to the sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and HuggingFace datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/docs/datasets-guide.md). -- OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/docs/optim-levels-guide.md). +- EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`; see the examples below for more details or the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). +- USE_CASE: the use case you're solving for such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). +- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md). +- DATA: the dataset you want to use to the sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and HuggingFace datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). +- OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md). #### Running One-Shot @@ -188,7 +188,7 @@ Where the values for each of the arguments follow these general rules: | **++** | **+++++** | **+++** | One-Shot Experiments are the quickest way to create a faster and smaller version of your model. -The algorithms are applied to the model post training utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. +The algorithms are applied to the model post-training utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. They are ideal for when you want to quickly sparsify your model and don't have a lot of time to spend on the sparsification process. @@ -251,17 +251,56 @@ NLP Example: sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 ``` -### Compare the Results +### Compare the Experiment results -Once you have run your Experiment, you can compare the results printed out to the console. +Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. + +To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. + + +For more information on benchmarking, see the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). + +Here is an example of a `deepsparse.benchmark`command: + +``` +deepsparse.benchmark zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none --scenario sync + +``` + The results will look something like this: ```bash -Sparsify Results: -TODO +2023-06-30 15:20:41 deepsparse.benchmark.benchmark_model INFO Thread pinning to cores enabled +downloading...: 100%|████████████████████████| 105M/105M [00:18<00:00, 5.81MB/s] +DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) +[7ffba5a84700 >WARN< operator() ./src/include/wand/utility/warnings.hpp:14] Generating emulated code for quantized (INT8) operations since no VNNI instructions were detected. Set NM_FAST_VNNI_EMULATION=1 to increase performance at the expense of accuracy. +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: + onnx_file_path: /home/rahul/.cache/sparsezoo/neuralmagic/obert-base-sst2_wikipedia_bookcorpus-pruned90_quantized/model.onnx + batch_size: 1 + num_cores: 10 + num_streams: 1 + scheduler: Scheduler.default + fraction_of_supported_ops: 0.9981 + cpu_avx_type: avx512 + cpu_vnni: False +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'input_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'attention_mask', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'token_type_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO Starting 'singlestream' performance measurements for 10 seconds +Original Model Path: zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none +Batch Size: 1 +Scenario: sync +Throughput (items/sec): 134.5611 +Latency Mean (ms/batch): 7.4217 +Latency Median (ms/batch): 7.4245 +Latency Std (ms/batch): 0.0264 +Iterations: 1346 ``` +*Note: performance improvement is not guaranteed across all runtimes and hardware types.* + + ### Package for Deployment Landing soon! diff --git a/docs/cli-api-guide.md b/docs/cli-api-guide.md index fa9bb319..5ba36c87 100644 --- a/docs/cli-api-guide.md +++ b/docs/cli-api-guide.md @@ -14,5 +14,186 @@ See the License for the specific language governing permissions and limitations under the License. --> + # Sparsify CLI/API Guide + +The Sparsify CLI/API is a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate into your own workflows. + +## Install Sparsify + +Next, you'll need to install Sparsify on your training hardware. +To do this, run the following command: + +```bash +pip install sparsify +``` + +For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify#installation) section. + +## Login to Sparsify + +With Sparsify installed on your training hardware, you'll need to authorize the local CLI to access your account. +This is done by running the `sparsify.login` command and providing your API key. +Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal. +Once you have located this, copy the command or the API key itself and run the following command: + +```bash +sparsify.login API_KEY +```` + +The `sparsify.login API_KEY` command is used to sync your local training environment with the Sparsify Cloud in order to keep track of your Experiments. Once you run the `sparsify.login API_KEY` command, you should see a confirmation via the console that you are logged into Sparsify. To log out of Sparsify, use the `exit` command. + +If you encounter any issues with your API key, reach out to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), [email](mailto::rob@neuralmagic.com) or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). + + +## Run an Experiment + +Experiments are the core of sparsifying a model. +They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. + +All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. + +To run an Experiment, you can use either the CLI or the API depending on your use case. +The Sparsify Cloud provides a UI for exploring hyperparameters, predicting performance, and generating the desired CLI/API command. + +The general command for running an Experiment is: + +```bash +sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL +``` + +Where the values for each of the arguments follow these general rules: +- EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`. + +- USE_CASE: the use case you're solving for such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). + +- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md). + +- DATA: the dataset you want to use to the sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and HuggingFace datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). + +- OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md). + + +### Experiment Type Examples +#### Running One-Shot + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|----------| +| **++** | **+++++** | **+++** | + +One-Shot Experiments are the quickest way to create a faster and smaller version of your model. +The algorithms are applied to the model post training utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. + +Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. +They are ideal for when you want to quickly sparsify your model and don't have a lot of time to spend on the sparsification process. + +CV Example: +```bash +sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +``` + +NLP Example: +```bash +sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +``` + +#### Running Sparse-Transfer + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|-----------| +| **++++** | **++++** | **+++++** | + +Sparse-Transfer Experiments are the second quickest way to create a faster and smaller model for your dataset. +Sparse, foundational models are sparsified in a Training-Aware manner on a large dataset such as ImageNet. +Then, the sparse patterns are transferred to your dataset through a fine-tuning process. + +Generally, Sparse-Transfer Experiments result in a 5-10x speedup with minimal accuracy loss. +They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. +Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. + +CV Example: +```bash +sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 +``` + +NLP Example: +```bash +sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 +``` + +#### Running Training-Aware + +| Sparsity | Sparsification Speed | Accuracy | +|-----------|-----------------------|-----------| +| **+++++** | **++** | **+++++** | + +Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. +The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. +However, they do require additional training time and hyperparameter tuning to achieve the best results. + +Generally, Training-Aware Experiments result in a 6-12x speedup with minimal accuracy loss. +They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. +Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. + +CV Example: +```bash +sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +``` + +NLP Example: +```bash +sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +``` + +## Advanced CLI/API Usage Landing Soon! + + +## Compare the Experiment results + +Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. +In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. + + +To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. + + +For more information on benchmarking, see the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). + +Here is an example of a `deepsparse.benchmark`command: + +``` +deepsparse.benchmark zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none --scenario sync + +``` + +The results will look something like this: +```bash +2023-06-30 15:20:41 deepsparse.benchmark.benchmark_model INFO Thread pinning to cores enabled +downloading...: 100%|████████████████████████| 105M/105M [00:18<00:00, 5.81MB/s] +DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) +[7ffba5a84700 >WARN< operator() ./src/include/wand/utility/warnings.hpp:14] Generating emulated code for quantized (INT8) operations since no VNNI instructions were detected. Set NM_FAST_VNNI_EMULATION=1 to increase performance at the expense of accuracy. +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: + onnx_file_path: /home/rahul/.cache/sparsezoo/neuralmagic/obert-base-sst2_wikipedia_bookcorpus-pruned90_quantized/model.onnx + batch_size: 1 + num_cores: 10 + num_streams: 1 + scheduler: Scheduler.default + fraction_of_supported_ops: 0.9981 + cpu_avx_type: avx512 + cpu_vnni: False +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'input_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'attention_mask', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'token_type_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO Starting 'singlestream' performance measurements for 10 seconds +Original Model Path: zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none +Batch Size: 1 +Scenario: sync +Throughput (items/sec): 134.5611 +Latency Mean (ms/batch): 7.4217 +Latency Median (ms/batch): 7.4245 +Latency Std (ms/batch): 0.0264 +Iterations: 1346 +``` + +*Note: performance improvement is not guaranteed across all runtimes and hardware types.* diff --git a/docs/cloud-user-guide.md b/docs/cloud-user-guide.md index b5b16501..8e225dd2 100644 --- a/docs/cloud-user-guide.md +++ b/docs/cloud-user-guide.md @@ -45,7 +45,7 @@ To do this, run the following command: pip install sparsify ``` -For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify/README.md#installation) section. +For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify/blob/main/README.md#installation) section. You may copy the command from the Sparsify Cloud in step 1 and run that in your training environment to install Sparsify. @@ -64,7 +64,7 @@ Once you have located this, copy the command or the API key itself and run the f sparsify.login API_KEY ```` -You may copy the command from the Sparsify Cloud in step 2 and run that in your training environment after installing Sparsify to log in via the Sparsify CLI. For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). +You may copy the command from the Sparsify Cloud in step 2 and run that in your training environment after installing Sparsify to log in via the Sparsify CLI. For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). ## Run an Experiment Experiments are the core of sparsifying a model. @@ -79,7 +79,7 @@ To run an Experiment, use the Sparsify Cloud to generate a code command to run i ![Sparsify a model](https://drive.google.com/uc?id=1FyayVSqq5YtKO_dEgt5iMNSZQNsqaQFq) 3. Select a Use Case for your model. Note that if your use case is not present in the dropdown, fear not; the use case does not affect the optimization of the model. -4. Choose the Experiment Type. To learn more about the Experiments, see the [Sparsify README](https://github.com/neuralmagic/sparsify/README.md#run-an-experiment). +4. Choose the Experiment Type. To learn more about the Experiments, see the [Sparsify README](https://github.com/neuralmagic/sparsify/blob/main/README.md#run-an-experiment). 5. Adjust the Hyperparameter Compression slider to designate whether you would like to to optimize the model for performance, accuracy, or a balance of both. Note that selecting an extreme on the slider will not completely tank the opposing metric. 6. Click 'Generate Code Snippet' to view the code snipppet generated from your sparsification selections on the next modal. ![Generate Code Snippetl](https://drive.google.com/uc?id=14B193hHeYqLeSX8r6C5N1G8beBeXUkYE) @@ -90,12 +90,15 @@ To run an Experiment, use the Sparsify Cloud to generate a code command to run i ![Generate Code Snippetl](https://drive.google.com/uc?id=1xWrla3ps0qeS70P1bzOIYGeIPXWgHfF_) -To learn more about the arguments for the `sparsify.run` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/docs/cli-api-guide.md). - +To learn more about the arguments for the `sparsify.run` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). ## Compare the Experiment results +Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. +In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. + + To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. @@ -108,5 +111,33 @@ deepsparse.benchmark zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/s ``` -*Note: performance improvement is not guaranteed across all runtimes and hardware types.* +The results will look something like this: +```bash +2023-06-30 15:20:41 deepsparse.benchmark.benchmark_model INFO Thread pinning to cores enabled +downloading...: 100%|████████████████████████| 105M/105M [00:18<00:00, 5.81MB/s] +DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) +[7ffba5a84700 >WARN< operator() ./src/include/wand/utility/warnings.hpp:14] Generating emulated code for quantized (INT8) operations since no VNNI instructions were detected. Set NM_FAST_VNNI_EMULATION=1 to increase performance at the expense of accuracy. +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: + onnx_file_path: /home/rahul/.cache/sparsezoo/neuralmagic/obert-base-sst2_wikipedia_bookcorpus-pruned90_quantized/model.onnx + batch_size: 1 + num_cores: 10 + num_streams: 1 + scheduler: Scheduler.default + fraction_of_supported_ops: 0.9981 + cpu_avx_type: avx512 + cpu_vnni: False +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'input_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'attention_mask', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'token_type_ids', type = int64, shape = [1, 128] +2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO Starting 'singlestream' performance measurements for 10 seconds +Original Model Path: zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none +Batch Size: 1 +Scenario: sync +Throughput (items/sec): 134.5611 +Latency Mean (ms/batch): 7.4217 +Latency Median (ms/batch): 7.4245 +Latency Std (ms/batch): 0.0264 +Iterations: 1346 +``` +*Note: performance improvement is not guaranteed across all runtimes and hardware types.* From 047933df5780c2b6ed9d3cfd70fff322af7ab84f Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 3 Jul 2023 10:17:05 -0400 Subject: [PATCH 02/47] Fix README.md links (#242) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cf7cd8a8..c60c8a4a 100644 --- a/README.md +++ b/README.md @@ -111,8 +111,8 @@ pip install sparsify Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. -We recommend you use a Linux system with a GPU that has a minimum of 16 GB of GPU Memory, 128 GB of RAM, 4 cores, and is CUDA-enabled. If you are sparsifying a very large model, you may need more RAM than the recommended 128 GB. -If you encounter issues setting up your training environment, file a GitHub issue [here]( https://github.com/neuralmagic/sparsify/issues). +We recommend you use a Linux system with a GPU that has a minimum of 16GB of GPU Memory, 128GB of RAM, 4 CPU cores, and is CUDA-enabled. If you are sparsifying a very large model, you may need more RAM than the recommended 128GB. +If you encounter issues setting up your training environment, file a GitHub issue [here](https://github.com/neuralmagic/sparsify/issues). ## Quick Start From 2b25764615d98ca23832a385295abf9408157fb0 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 3 Jul 2023 11:24:59 -0400 Subject: [PATCH 03/47] Update run.py one-shot to support SparseZoo stubs (#244) * Update run.py one-shot to support SparseZoo stubs * Style --- src/sparsify/cli/run.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sparsify/cli/run.py b/src/sparsify/cli/run.py index fc19c5b7..68e8fe29 100644 --- a/src/sparsify/cli/run.py +++ b/src/sparsify/cli/run.py @@ -17,6 +17,7 @@ from pathlib import Path import click +from sparsezoo import Model from sparsify.cli import opts @@ -49,7 +50,7 @@ def one_shot(**kwargs): recipe_args = json.loads(recipe_args) one_shot.one_shot( - model=Path(kwargs["model"]), + model=Path(_maybe_unwrap_zoo_stub(kwargs["model"])), dataset_dir=Path(kwargs["data"]), num_samples=kwargs["train_samples"] or None, deploy_dir=Path(kwargs["working_dir"]), @@ -123,5 +124,11 @@ def _parse_run_args_to_auto(sparse_transfer: bool, **kwargs): ) +def _maybe_unwrap_zoo_stub(model_path: str) -> str: + if model_path.startswith("zoo:"): + return Model(model_path).onnx_model.path + return model_path + + if __name__ == "__main__": main() From 29d10792eb2c70a526559e933bf25d9a72340d34 Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Mon, 3 Jul 2023 16:46:45 -0400 Subject: [PATCH 04/47] fix optim level in readme (#246) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c60c8a4a..cd557988 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ To get started immediately, [create an account](https://account.neuralmagic.com/ With all of that setup, sparsifying your models is as easy as: ```bash -sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 50 --train-kwargs '{"dataset": "imagenette"}' +sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 0.5 --train-kwargs '{"dataset": "imagenette"}' ```
From 3080de710fa67e77811b0b50ac0653d6313e6036 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 6 Jul 2023 09:30:20 -0400 Subject: [PATCH 05/47] update DDP_ENABLED flag to be False if cuda available (#249) --- src/sparsify/auto/tasks/runner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/runner.py b/src/sparsify/auto/tasks/runner.py index ee48b264..ffbf3363 100644 --- a/src/sparsify/auto/tasks/runner.py +++ b/src/sparsify/auto/tasks/runner.py @@ -41,7 +41,9 @@ "TaskRunner", ] -DDP_ENABLED = not (os.environ.get("NM_AUTO_DISABLE_DDP", False)) +DDP_ENABLED = ( + not (os.environ.get("NM_AUTO_DISABLE_DDP", False)) and torch.cuda.is_available() +) MAX_RETRY_ATTEMPTS = os.environ.get("NM_MAX_SCRIPT_RETRY_ATTEMPTS", 3) # default: 3 MAX_MEMORY_STEPDOWNS = os.environ.get("NM_MAX_SCRIPT_MEMORY_STEPDOWNS", 10) SUPPORTED_TASKS = [ From 9e78466326a413ae0afb2a7d8f766d22f9c3efcf Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Thu, 6 Jul 2023 15:51:13 -0400 Subject: [PATCH 06/47] Add preliminary sparsify Dockerfile (#234) * Dockerfile with Workflow files * Upgrade pip * Add empty last line --- .github/workflows/build-docker-image.yaml | 72 +++++++++++++++++++ .../workflows/build-nightly-docker-image.yaml | 58 +++++++++++++++ docker/Dockerfile | 30 +++----- 3 files changed, 140 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/build-docker-image.yaml create mode 100644 .github/workflows/build-nightly-docker-image.yaml diff --git a/.github/workflows/build-docker-image.yaml b/.github/workflows/build-docker-image.yaml new file mode 100644 index 00000000..baf3992c --- /dev/null +++ b/.github/workflows/build-docker-image.yaml @@ -0,0 +1,72 @@ +name: Build and Publish Sparsify Release Docker Images + +on: + release: + types: [published] + +jobs: + build-and-push-docker-image: + name: Build and Push Version Tagged Docker Images to GitHub Container Registry + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + + - name: Set up Docker Buildx + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + id: buildx + uses: docker/setup-buildx-action@v2 + with: + buildkitd-flags: --debug + + - name: Login to Github Packages + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Checkout code + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Get Tag + id: extract_tag + run: echo "##[set-output name=tag;]$(echo ${GITHUB_REF_NAME#*/})" + + - name: Current Version Name + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + run: | + echo ${{ steps.extract_tag.outputs.tag }} + + - name: Build and push sparsify release ${{ steps.extract_tag.outputs.tag }} docker image + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + uses: docker/build-push-action@v2 + with: + context: ./docker + build-args: | + REF=release/${{ steps.extract_tag.outputs.tag }} + push: true + tags: | + ghcr.io/neuralmagic/sparsify:${{ steps.extract_tag.outputs.tag }} + + + + - name: Image digest + if: ${{ startsWith(github.ref, 'refs/tags/v') }} + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/.github/workflows/build-nightly-docker-image.yaml b/.github/workflows/build-nightly-docker-image.yaml new file mode 100644 index 00000000..aa28d3f0 --- /dev/null +++ b/.github/workflows/build-nightly-docker-image.yaml @@ -0,0 +1,58 @@ +name: Build and Publish Sparsify Release Docker Images + +on: + push: + branches: + - 'main' + +jobs: + build-and-push-docker-image: + name: Build and Push Version Tagged Docker Images to GitHub Container Registry + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Cleanup disk + run: | + sudo ls -l /usr/local/lib/ + sudo ls -l /usr/share/ + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + sudo rm -rf /usr/local/lib/android + sudo rm -rf /usr/share/dotnet + sudo du -sh /usr/local/lib/ + sudo du -sh /usr/share/ + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + with: + buildkitd-flags: --debug + + - name: Checkout code + uses: actions/checkout@v3 + with: + fetch-depth: 1 + + - name: Login to Github Packages + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + + - name: Build and push sparsify-nightly docker image + uses: docker/build-push-action@v2 + with: + context: ./docker + build-args: | + REF=main + push: true + tags: | + ghcr.io/neuralmagic/sparsify-nightly:latest + + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/docker/Dockerfile b/docker/Dockerfile index b68fef09..6e0ed958 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,25 +1,15 @@ -# Setup the base image -FROM python:3.8-slim-bullseye +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 -# Install git -RUN : \ - && apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends git \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +ARG DEBIAN_FRONTEND=noninteractive -# Activate venv -RUN python3.8 -m venv /venv -ENV PATH="venv/bin:$PATH" +RUN apt-get update && apt-get install --no-install-recommends -y \ + git python3 python3-dev python3-venv python3-pip python3-wheel build-essential && \ + apt-get clean && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade setuptools wheel -# Setup DeepSparse +ARG REF=main +RUN git clone https://github.com/neuralmagic/sparsify && cd sparsify && git checkout $REF +RUN python3 -m pip install --upgrade pip && \ + python3 -m pip install --no-cache-dir -e ./sparsify -ARG GIT_CHECKOUT -# if $GIT_CHECKOUT is not specified - just install from pypi -RUN if [ -z "${GIT_CHECKOUT}" ] ; then pip3 install --no-cache-dir --upgrade deepsparse[server] ; fi - -# if $GIT_CHECKOUT is specified - clone, checkout $GIT_CHECKOUT, and install with -e -RUN if [ -n "${GIT_CHECKOUT}" ] ; then git clone https://github.com/neuralmagic/deepsparse.git --depth 1 -b $GIT_CHECKOUT; fi -RUN if [ -n "${GIT_CHECKOUT}" ] ; then pip3 install --no-cache-dir --upgrade -e "./deepsparse[server]" ; fi +CMD ["/bin/bash"] From fb685ec73b12871835d6974b8bf0c02703130126 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Fri, 7 Jul 2023 10:24:56 -0400 Subject: [PATCH 07/47] Add _LOGGER, set to INFO level (#250) Add logs when moving deployment directory --- src/sparsify/auto/tasks/runner.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/sparsify/auto/tasks/runner.py b/src/sparsify/auto/tasks/runner.py index ffbf3363..47d369cb 100644 --- a/src/sparsify/auto/tasks/runner.py +++ b/src/sparsify/auto/tasks/runner.py @@ -15,6 +15,7 @@ import gc import json +import logging import os import shutil import socket @@ -57,6 +58,8 @@ ] ] _TASK_RUNNER_IMPLS = {} +_LOGGER = logging.getLogger(__name__) +_LOGGER.setLevel(logging.INFO) # set at top level to modify later def retry_stage(stage: str): @@ -394,20 +397,24 @@ def create_deployment_directory(self, train_directory: str, deploy_directory: st """ Creates and/or moves deployment directory to the deployment directory for the mode corresponding to the trial_idx - + :post-condition: The deployment artifacts will be moved from + origin_directory to deploy_directory :param train_directory: directory to grab the exported files from :param deploy_directory: directory to save the deployment files to """ origin_directory = self._get_default_deployment_directory(train_directory) - + _LOGGER.info("Moving %s to %s" % (origin_directory, deploy_directory)) for filename in os.listdir(origin_directory): source_file = os.path.join(origin_directory, filename) target_file = os.path.join(deploy_directory, filename) shutil.move(source_file, target_file) + + _LOGGER.info("Deleting %s" % origin_directory) shutil.rmtree(origin_directory) with open(os.path.join(deploy_directory, "readme.txt"), "x") as f: f.write("deployment instructions will go here") + _LOGGER.info("Deployment directory moved to %s" % deploy_directory) @abstractmethod def _train_completion_check(self) -> bool: From cd5a93873bf3721a1585b8fd0823bcb2836b65a1 Mon Sep 17 00:00:00 2001 From: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> Date: Fri, 7 Jul 2023 10:00:08 -0700 Subject: [PATCH 08/47] Add environment checks (#233) * Clear existing sparsify source * Add back version file * Port of sparsify.auto from private repository (#124) * remove javascript deps * Initial port of autosparse to sparsify.auto * Initial port autosparse -> sparsify.auto * Added tests and fixes * Add back yarn * Add github workflow for test checks * Update workflows Co-authored-by: Benjamin Fineran workflow * Add GHA tests for base, package, and auto (#133) * `sparsify.package` base CLI (#125) * bump up main to 1.2.0 (#128) Co-authored-by: dhuang * Adds the following: * Setup directory Structure * `from sparsify import package` importable + callable function * A constants file with supported tasks, criterions, and deployment scenarios (Should probably converted to `Enums` or something better than `python lists`) * Add `click` as a required dependency * Additional CLI helpers for updated acceptance criterion * `sparsify.package` cli utility * setup test directory * Add tests for CLI * Setup Entrypoints * Remove old docstring * - Moved utils outside `package` - Renamed package_ to package - Add more tests - Update Usage command - Rebased on `sparsify.alpha` - Add typing - Add version info to cli Apply review comments from @corey-nm - Remove `cli_helpers.py` and rely on `click` * Remove unintended change added while resolving merge conflicts * Style * Add dataset registry update cli to use dataset registry * Fix failing tests * Centralize task registry (#132) * Centralize task name and alias handeling * Propagate TaskName updates to auto tasks * Fix click parse args call * Fix failing tests after TASK name updates * Prevent auto install of integrations on sparsify import (#134) * * Change `NO_VNNI` --> `DEFAULT` * Refactor CLI arg parsing cause originally `System.exit()` was thrown on invoking help * Rename `scenario` --> `target` * Remove single character shortcuts, as suggested by @bfineran * Default directory to `None` for now, logic to choose an appropriate name will be added to diff #130 * Added show defaults at the top level `click.command()` decorator * Added a `DEFAULT_OPTIMIZNG_METRIC` * Added a `DEFAULT_DEPLOYMENT_SCENARIO` * Changed `optimizing_metric` help message * Updated Tests * - Style - Example Usage Co-authored-by: dhuangnm <74931910+dhuangnm@users.noreply.github.com> Co-authored-by: dhuang Co-authored-by: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> * Add DDP support (#126) * `sparsify.package` backend-call (#130) * bump up main to 1.2.0 (#128) Co-authored-by: dhuang * Adds the following: * Setup directory Structure * `from sparsify import package` importable + callable function * A constants file with supported tasks, criterions, and deployment scenarios (Should probably converted to `Enums` or something better than `python lists`) * Add `click` as a required dependency * Additional CLI helpers for updated acceptance criterion * `sparsify.package` cli utility * setup test directory * Add tests for CLI * Setup Entrypoints * Remove old docstring * - Moved utils outside `package` - Renamed package_ to package - Add more tests - Update Usage command - Rebased on `sparsify.alpha` - Add typing - Add version info to cli Apply review comments from @corey-nm - Remove `cli_helpers.py` and rely on `click` * Remove unintended change added while resolving merge conflicts * Style * Add dataset registry update cli to use dataset registry * Fix failing tests * Centralize task registry (#132) * Centralize task name and alias handeling * Propagate TaskName updates to auto tasks * Fix click parse args call * Fix failing tests after TASK name updates * Prevent auto install of integrations on sparsify import (#134) * * Change `NO_VNNI` --> `DEFAULT` * Refactor CLI arg parsing cause originally `System.exit()` was thrown on invoking help * Rename `scenario` --> `target` * Remove single character shortcuts, as suggested by @bfineran * Default directory to `None` for now, logic to choose an appropriate name will be added to diff #130 * Added show defaults at the top level `click.command()` decorator * Added a `DEFAULT_OPTIMIZNG_METRIC` * Added a `DEFAULT_DEPLOYMENT_SCENARIO` * Changed `optimizing_metric` help message * Updated Tests * - Style - Example Usage * Add proper commands + gha workflows * Refactor package function to make a call to the backend service * Add template function for output Add importable Backend Base url Remove unnecessary args from package function Add end to end integration test * Updated tests, addressed comments * Base Cli + importable function * Style * Remove files added in faulty rebase * Changed base url, styling Co-authored-by: dhuangnm <74931910+dhuangnm@users.noreply.github.com> Co-authored-by: dhuang Co-authored-by: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> Co-authored-by: Konstantin * `sparsify.package` updates (#141) * Update output to also print model metrics Update `--optimizing_metrics` to take in a string containing comma separated metrics for example `--optimizing_metric "compression, accuracy"`(added a `_csv_callback` function for that) Update Usage instructions accordingly Add a log statement to package function Added more tests * Address comments * Rename `normalized_metric` --> `metric_` to avoid potential confusion * Add a getter for TASK_REGISTRY and DATASET_REGISTRY (#142) * Add a getter for TASK_REGISTRY and DATASET_REGISTRY * typing * fix potential bug * Add None to test * Updated tests according to comments from @bfineran * Make test cleaner based on feedback from @corey-nm * Remove config creator (#136) * [Auto] Add Tensorboard Support (#147) * Support for Hyperparameter Tuning (#145) * force convert yolov5 metric keys to float (#151) * [Auto] Update function name and description to be more generic (#149) * rename and flip logic for stopping_condition flag (#152) * [Auto] Support for multi-stage tuning (#157) * Support for updated tuning flow (#159) * Support tuning of CLI args (#158) * Support multiple optimizing metrics (#160) * Log important updates with an easily visible format (#161) * Update the user output for `sparsify.package` (#166) * Add Dockerfile Download deployment directory, and Update instructions for user Update tests * Add volume mount to docker command * [Auto] Update interface for sparsifyml (#173) * Fix: remove debug line * Update sparsify.auto interface for sparsifyml * rename interface -> schemas * Sparsify.alpha.auto (#179) * Update: sparsify.version to match with main * Delete: sparsify.package * Empty commit * Add: stitch functions * Update: Env var name Update: stitch functions slightly * Add: Sparsifyml to dependencies in setup.py * Style: Fixes * Some more fixers * OLD IC integration working * Run Integration Tests only when sparsifyml installed * Fix yolov5 integration * Propagate student args to teacher * Update teacher kwargs only when key not present for safety * Updated: integration_test * Updated: num trials to 2 * Fix: failing GHA * make sparsifyml optional implement own strtobool function * [Create] alpha implementation (#181) * [Create] alpha implementation * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: corey-nm <109536191+corey-nm@users.noreply.github.com> --------- Co-authored-by: corey-nm <109536191+corey-nm@users.noreply.github.com> * Adding one shot cli (#184) * [Feature branch] standard clis (#187) * Adding skeleton clis * [CLI standardization] sparsify.run one-shot impl (#188) * [CLI standardization] sparsify.run one-shot impl * Fixing one-shot cli --------- Co-authored-by: Corey Lowman * [WIP][CLI standardization] sparsify.run training-aware and spares-transfer initial impl (#189) * [CLI standardization] sparsify.run one-shot impl * [WIP][CLI standardization] sparsify.run training-aware and spares-transfer initial impl * Fixing training-aware/sparse-transfer --------- Co-authored-by: Corey Lowman * Adding docstring to sparsify.run * Moving use case to top arg * Removing apply/init --------- Co-authored-by: Benjamin Fineran * Style changes for sparsify.alpha (#194) * Update: Minimum supported Python Version to `3.7` as it's consistent with our other repos (#193) * [Add] `sparsify.login` CLI and function (#180) * Adding sparsify.login entrypoint and function * Adding docstring to exception * Adding pip install of sparsifyml * Respond to review * Adding help message at top * Adding setup python to workflow * Adding checked sparsifyml import * Apply suggestions from code review Co-authored-by: Danny Guinther * check against major minor version only * add client_id and other bug fixes * Fix: `--index` --> `--index-url` * Update install command missed during rebase * * Clean up code * Remove Global variables * Update PyPi Server link * Add Logging * Move exceptions to their own file * Style fixes * Apply suggestions from code review Add: suggestion from @KSGulin Co-authored-by: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> * Update src/sparsify/login.py Co-authored-by: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> * remove comment --------- Co-authored-by: Benjamin Fineran Co-authored-by: Danny Guinther Co-authored-by: Benjamin Co-authored-by: rahul-tuli Co-authored-by: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> * training aware and sparse transfer run mode support (#191) * add sparsifyml dependencies to sparsify install (#195) * update task registry + generalize matching (#201) * rename performance to optim-level in legacy auto api (#199) * [sparsify.run one-shot] CLI propagation of recipe_args (#198) * Remove hardware optimization options (#200) * Remove hardware optimization options * Rename instead of remove optim_level * Add OPTIM_LEVEL back to all list * simple fixes in initial one-shot testing flow (#206) * fixes for initial E2E runs of sparse transfer and training aware (#207) * fixes for initial E2E runs of sparse transfer and training aware * quality * [Alpha] Rework Auto main script into Training-Aware and Sparse-Transfer script (#208) * Initial scratch work * Complete, but untested implementation * Working yolov5 * Working across all integrations * IC path fix * Require model * Remove debug adds * make API KEY an argument (#211) * Update integration and unit tests (#214) * Update integration and unit tests * Update IC base test model * Add login step to test setup (#216) * bump up version to 1.6.0 (#215) (#218) Co-authored-by: dhuang (cherry picked from commit 699a47648a819678cd2e88075c934e4a96eb316e) Co-authored-by: dhuangnm <74931910+dhuangnm@users.noreply.github.com> * [BugFixes] Fix failing tests in `sparsify.alpha` (#223) * Intermediate commit should be amended * Remove failing test as synced with @KSGulin * Explicitly pin protobuff depencies. (#225) * Default num_samples to None (#227) * remove legacy UI cmds from `make build` (#229) * Remove dev print statements from IC runner (#231) * Remove dev print statements * Remove logger * Fix incomplete wheel build (#232) * Fix incomplete wheel build * Add license string * Add environment hecks * Address review comments * Catch generic Exception * signal test --------- Co-authored-by: Rahul Tuli Co-authored-by: dhuangnm <74931910+dhuangnm@users.noreply.github.com> Co-authored-by: dhuang Co-authored-by: Benjamin Fineran Co-authored-by: corey-nm <109536191+corey-nm@users.noreply.github.com> Co-authored-by: Danny Guinther Co-authored-by: Benjamin --- setup.py | 1 + src/sparsify/check_environment/__init__.py | 20 ++ src/sparsify/check_environment/gpu_device.py | 39 ++++ src/sparsify/check_environment/main.py | 26 +++ src/sparsify/check_environment/ort_health.py | 179 ++++++++++++++++++ .../check_environment/pathway_checks.py | 35 ++++ src/sparsify/cli/run.py | 7 + 7 files changed, 307 insertions(+) create mode 100644 src/sparsify/check_environment/__init__.py create mode 100644 src/sparsify/check_environment/gpu_device.py create mode 100644 src/sparsify/check_environment/main.py create mode 100644 src/sparsify/check_environment/ort_health.py create mode 100644 src/sparsify/check_environment/pathway_checks.py diff --git a/setup.py b/setup.py index 4bb6efc2..ec578924 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,7 @@ def _setup_entry_points() -> Dict: "console_scripts": [ "sparsify.run=sparsify.cli.run:main", "sparsify.login=sparsify.login:main", + "sparsify.check_environment=sparsify.check_environment.main:main", ] } diff --git a/src/sparsify/check_environment/__init__.py b/src/sparsify/check_environment/__init__.py new file mode 100644 index 00000000..05666a99 --- /dev/null +++ b/src/sparsify/check_environment/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa +# isort: skip_file + +from .gpu_device import * +from .ort_health import * +from .pathway_checks import * diff --git a/src/sparsify/check_environment/gpu_device.py b/src/sparsify/check_environment/gpu_device.py new file mode 100644 index 00000000..e7d57540 --- /dev/null +++ b/src/sparsify/check_environment/gpu_device.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import torch + + +_LOGGER = logging.getLogger(__name__) + +__all__ = ["check_for_gpu"] + + +def check_for_gpu(): + """ + Check for GPU and warn if not found + """ + _LOGGER.warning("Checking for GPU...") + if not torch.cuda.is_available(): + _LOGGER.warn( + "*************************** NO GPU DETECTED ***************************\n" + "No GPU(s) detected on machine. The use of a GPU for training-aware " + "sparsification, sparse-transfer learning, and one-shot sparsification is " + "highly recommended.\n" + "************************************************************************" + ) + else: + _LOGGER.warning("GPU check completed successfully") diff --git a/src/sparsify/check_environment/main.py b/src/sparsify/check_environment/main.py new file mode 100644 index 00000000..0e88906f --- /dev/null +++ b/src/sparsify/check_environment/main.py @@ -0,0 +1,26 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from sparsify.check_environment import check_for_gpu, check_ort_health + + +def main(): + """ + Check the environment for compatibility with the sparsifyml package + """ + check_for_gpu() + check_ort_health() + + +if __name__ == "__main__": + main() diff --git a/src/sparsify/check_environment/ort_health.py b/src/sparsify/check_environment/ort_health.py new file mode 100644 index 00000000..e7a2cda5 --- /dev/null +++ b/src/sparsify/check_environment/ort_health.py @@ -0,0 +1,179 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import signal +from typing import List, Optional + +import numpy +import torch +from onnx import TensorProto, helper + +import onnxruntime as ort +from deepsparse.utils import generate_random_inputs, get_input_names +from sparsifyml.one_shot.utils import run_onnx_model + + +__all__ = ["check_ort_health"] + +_LOGGER = logging.getLogger(__name__) + + +CUDA_HELP_STRING = ( + "If you would like to run on GPU, please ensure that your CUDA and cuDNN " + "versions are compatible with the installed version of onnxruntime-gpu: " + "https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements" # noqa: E501 +) + + +def _create_simple_conv_graph( + image_pixels_side: int = 32, + channel_count: int = 3, + batch_size: int = 1, + kernel_size: int = 3, + kernel_count: int = 10, +): + feature_size_side = image_pixels_side - kernel_size + 1 + + # The inputs and outputs + X = helper.make_tensor_value_info( + "X", + TensorProto.FLOAT, + [batch_size, channel_count, image_pixels_side, image_pixels_side], + ) + Y = helper.make_tensor_value_info( + "Y", + TensorProto.FLOAT, + [batch_size, kernel_count, feature_size_side, feature_size_side], + ) + + # Create nodes for Conv, Relu, Flatten, and Gemm (Fully Connected) operations + conv_node = helper.make_node( + "Conv", + inputs=["X", "conv_weight", "conv_bias"], + outputs=["conv_result"], + kernel_shape=[kernel_size, kernel_size], + ) + + relu_node1 = helper.make_node( + "Relu", + inputs=["conv_result"], + outputs=["Y"], + ) + + # Define the weights for the Conv and Gemm layers + conv_weight = helper.make_tensor( + "conv_weight", + TensorProto.FLOAT, + [kernel_count, channel_count, kernel_size, kernel_size], + numpy.random.randn(kernel_count, channel_count, kernel_size, kernel_size), + ) + conv_bias = helper.make_tensor( + "conv_bias", TensorProto.FLOAT, [kernel_count], numpy.random.randn(kernel_count) + ) + + # Create the graph (model) + + graph_def = helper.make_graph( + [conv_node, relu_node1], + "SimpleCNN", + inputs=[X], + outputs=[Y], + initializer=[conv_weight, conv_bias], + ) + + return helper.make_model(graph_def, producer_name="onnx-example") + + +def check_ort_health(providers: Optional[List[str]] = None): + """ + Checks that the model can be executed with the set providers + + :param model: model to check + :param providers: list of providers use for ORT execution + """ + _LOGGER.warning("Checking onnxruntime-gpu environment health...") + + model = _create_simple_conv_graph() + + providers = ( + ["CUDAExecutionProvider"] + if torch.cuda.is_available() + else ["CPUExecutionProvider"] + ) + + # If cuda device found by torch, ensure it's found by ORT as well + if ort.get_device() != "GPU" and "CUDAExecutionProvider" in providers: + raise RuntimeError( + "CUDA enabled device detected on your machine, but is not detected by " + "onnxruntime. If you would like to run on CPU, please set " + "CUDA_VISIBLE_DEVICES=-1. Note that this is likely to slow down model " + f"compression significantly. {CUDA_HELP_STRING}" + ) + + # Ensure that ORT can execute the model + random_input = { + input_name: input + for input_name, input in zip( + get_input_names(model), generate_random_inputs(model) + ) + } + + # Define a custom exception and signal handler + class _TerminationSignal(Exception): + pass + + def handle_termination_signal(signum, frame): + raise _TerminationSignal("Termination signal received") + + # Register the signal handler for SIGTERM and SIGINT signals + signal.signal(signal.SIGTERM, handle_termination_signal) + signal.signal(signal.SIGINT, handle_termination_signal) + + try: + run_onnx_model( + model=model, + input_batch=random_input, + providers=providers, + ) + except _TerminationSignal as ts: + print("Termination signal caught:", ts) + except Exception as e: + # If run fails, try again with CPU only to ensure this is a CUDA environment + # issue + if providers != ["CPUExecutionProvider"]: + try: + run_onnx_model( + model=model, + input_batch=random_input, + providers=["CPUExecutionProvider"], + ) + + raise RuntimeError( + "ONNXRuntime execution failed with CUDAExecutionProvider" + "but succeeded with CPUExecutionProvider. This is indicative" + f"of a likely issue with nnxruntime-gpu install {CUDA_HELP_STRING}" + ) from e + + except RuntimeError: + pass + + raise RuntimeError( + "ONNXRuntime execution failed with both CUDAExecutionProvider and " + "CPUExecutionProvider. Ensure that onnxruntime-gpu and its dependencies " + "are properly installed." + ) from e + + _LOGGER.warning("onnxruntime-gpu environment check completed successfully") diff --git a/src/sparsify/check_environment/pathway_checks.py b/src/sparsify/check_environment/pathway_checks.py new file mode 100644 index 00000000..2afb6e64 --- /dev/null +++ b/src/sparsify/check_environment/pathway_checks.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from sparsify.check_environment import check_for_gpu, check_ort_health + + +__all__ = ["one_shot_checks", "auto_checks"] + + +def one_shot_checks(): + """ + Check environment for compatibility with one-shot sparsification + """ + check_for_gpu() + check_ort_health() + + +def auto_checks(): + """ + Check environment for compatibility with training-aware sparsification and + sparse-transfer learning + """ + check_for_gpu() diff --git a/src/sparsify/cli/run.py b/src/sparsify/cli/run.py index 68e8fe29..230fb6a9 100644 --- a/src/sparsify/cli/run.py +++ b/src/sparsify/cli/run.py @@ -18,6 +18,7 @@ import click from sparsezoo import Model +from sparsify.check_environment import auto_checks, one_shot_checks from sparsify.cli import opts @@ -45,6 +46,8 @@ def one_shot(**kwargs): # raises exception if sparsifyml not installed from sparsify.one_shot import one_shot + one_shot_checks() + recipe_args = kwargs.get("recipe_args") if isinstance(recipe_args, str): recipe_args = json.loads(recipe_args) @@ -75,6 +78,8 @@ def sparse_transfer(**kwargs): """ from sparsify import auto + auto_checks() + # recipe arg should be a sparse transfer recipe auto.main(_parse_run_args_to_auto(sparse_transfer=True, **kwargs)) @@ -92,6 +97,8 @@ def training_aware(**kwargs): """ from sparsify import auto + auto_checks() + # recipe arg should be a training aware recipe auto.main(_parse_run_args_to_auto(sparse_transfer=False, **kwargs)) From 7cecd81d814e9a00fc56b640a90d5d74f6c3aa66 Mon Sep 17 00:00:00 2001 From: Konstantin Gulin <66528950+KSGulin@users.noreply.github.com> Date: Fri, 7 Jul 2023 12:00:57 -0700 Subject: [PATCH 09/47] Turn QAT folding on by default (#252) --- src/sparsify/auto/tasks/image_classification/args.py | 7 +++++++ src/sparsify/auto/tasks/transformers/args.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/image_classification/args.py b/src/sparsify/auto/tasks/image_classification/args.py index 86b59013..a53c5896 100644 --- a/src/sparsify/auto/tasks/image_classification/args.py +++ b/src/sparsify/auto/tasks/image_classification/args.py @@ -158,3 +158,10 @@ class ImageClassificationExportArgs(_ImageClassificationBaseArgs): num_classes: Optional[int] = Field( default=None, description="number of classes for model load/export" ) + convert_qat: bool = Field( + default=True, + description=( + "if True, exports of torch QAT graphs will be converted to a fully " + "quantized representation. Default is True" + ), + ) diff --git a/src/sparsify/auto/tasks/transformers/args.py b/src/sparsify/auto/tasks/transformers/args.py index 3ae4ed62..268cddb3 100644 --- a/src/sparsify/auto/tasks/transformers/args.py +++ b/src/sparsify/auto/tasks/transformers/args.py @@ -697,7 +697,7 @@ class TransformersExportArgs(BaseArgs): description="Sequence length to use. Default is 384. Can be overwritten later", ) no_convert_qat: bool = Field( - default=True, + default=False, description=( "Set flag to not perform QAT to fully quantized conversion after export" ), From b0a4584682e813bd52c7a7d50d00eb405497b759 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Fri, 7 Jul 2023 16:01:13 -0400 Subject: [PATCH 10/47] Parse out the `data` arg for transformer workflows to populate train/test/validation kwargs (#251) * add functionality to parse directories: * update docstring: * testing fix * typo fix * add a series of warnings and errors if incorrect files or multiple files are found --- .../auto/tasks/transformers/runner.py | 83 ++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/src/sparsify/auto/tasks/transformers/runner.py b/src/sparsify/auto/tasks/transformers/runner.py index 2a7451e6..3a286013 100644 --- a/src/sparsify/auto/tasks/transformers/runner.py +++ b/src/sparsify/auto/tasks/transformers/runner.py @@ -15,7 +15,9 @@ import json import math import os -from typing import Tuple +import re +import warnings +from typing import Tuple, Union import onnx @@ -67,11 +69,14 @@ def config_to_args( :param config: training config to generate run for :return: tuple of training and export arguments """ + dataset, data_file_args = cls.parse_data_args(config.dataset) + config.kwargs.update(data_file_args) + train_args = cls.train_args_class( model_name_or_path=config.base_model, recipe=config.recipe, recipe_args=config.recipe_args, - dataset_name=config.dataset, + dataset_name=dataset, distill_teacher=config.distill_teacher if not config.distill_teacher == "off" else "disable", @@ -84,6 +89,80 @@ def config_to_args( return train_args, export_args + @classmethod + def parse_data_args(cls, dataset: str) -> Tuple[Union[str, None], dict]: + """ + Check if the dataset provided is a data directory. If it is, update the train, + test and validation file arguments with the approriate filepaths. This function + assumes any file containing the substrings "train", "test", or "val" are the + data files expected to be used. Duplicates will be updated to only use one file + path. Also, existing kwargs for train, test and validation files will be + overwritten if directory is provided. + + Example directory structure: + - data_for_training/ + - some_train_file.json + - some_validation_file.json + - test_dir/ + - some_test_file.json + + :params dataset: inputted data string arg. Assumed to either be a dataset which + can be downloaded publicly or a locally available directory containing + data files. + + :returns: updated dataset, train_file, test_file, and validation_file args + """ + data_file_args = {} + + def _check_and_update_file(root: str, current_file: str, file_type: str): + split_type = file_type.split("_")[0] + + if data_file_args.get(file_type, None): + warnings.warn( + f"A {split_type} file was already found with name " + f"{data_file_args[file_type]}. Updating with {current_file} " + ) + + if not current_file.lower().endswith(("json", "csv")): + warnings.warn( + f"Found {split_type} file named {current_file} " + "with incorrect file type (expected: json or csv). Skipping file." + ) + else: + data_file_args[file_type] = os.path.join(root, current_file) + + if os.path.isdir(dataset): + for root, _, files in os.walk(dataset): + for f in files: + if re.search(r"train", f): + _check_and_update_file(root, f, "train_file") + elif re.search(r"val", f): + _check_and_update_file(root, f, "validation_file") + elif re.search(r"test", f): + _check_and_update_file(root, f, "test_file") + + if ( + data_file_args.get("train_file", None) + and data_file_args.get("validation_file", None) + and data_file_args.get("test_file", None) + ): + break + + if not ( + data_file_args.get("train_file", None) + and data_file_args.get("validation_file", None) + ): + raise Exception( + "No training or validation files found. Be sure the " + "directory provided to the data arg contains json or csv " + "files with the train and val substrings in the filenames." + ) + + if data_file_args: + dataset = None + + return dataset, data_file_args + def tune_args_for_hardware(self, hardware_specs: HardwareSpecs): """ Update run args based on detected hardware specifications From d2d2bd58cad0ff614b3c8707f80b787faa168ed7 Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Fri, 7 Jul 2023 16:20:09 -0400 Subject: [PATCH 11/47] [transformers] unwrap GLUE dataset alias for classification sparse transfer+training aware (#253) * [transformers] unwrap GLUE dataset alias for classification sparse transfer+training aware * quality --- setup.py | 1 - .../auto/tasks/transformers/runner.py | 30 +++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index ec578924..83cd1d8d 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,6 @@ "setuptools>=56.0.0", "optuna>=3.0.2", "onnxruntime-gpu", - "protobuf<=3.20.1,>=3.12.2", ] _nm_deps = [ f"{'sparsezoo' if is_release else 'sparsezoo-nightly'}~={version_nm_deps}", diff --git a/src/sparsify/auto/tasks/transformers/runner.py b/src/sparsify/auto/tasks/transformers/runner.py index 3a286013..20f8a154 100644 --- a/src/sparsify/auto/tasks/transformers/runner.py +++ b/src/sparsify/auto/tasks/transformers/runner.py @@ -69,14 +69,23 @@ def config_to_args( :param config: training config to generate run for :return: tuple of training and export arguments """ - dataset, data_file_args = cls.parse_data_args(config.dataset) + dataset_name, data_file_args = cls.parse_data_args(config.dataset) config.kwargs.update(data_file_args) + if config.task == TASK_REGISTRY.get("text_classification") and ( + dataset_name in _GLUE_TASK_NAMES + ): + # text classification GLUE datasets need special treatment + # since the proper dataset names are set as "task" with + # the top level dataset as "glue" + config.kwargs["task_name"] = dataset_name + dataset_name = "glue" + train_args = cls.train_args_class( model_name_or_path=config.base_model, recipe=config.recipe, recipe_args=config.recipe_args, - dataset_name=dataset, + dataset_name=dataset_name, distill_teacher=config.distill_teacher if not config.distill_teacher == "off" else "disable", @@ -330,6 +339,23 @@ class QuestionAnsweringRunner(_TransformersRunner): } +# https://huggingface.co/datasets/glue +_GLUE_TASK_NAMES = { + "ax", + "cola", + "mnli", + "mnli_matched", + "mnli_mismatched", + "mrpc", + "qnli", + "qqp", + "rte", + "sst2", + "stsb", + "wnli", +} + + def _load_model_on_task(model_name_or_path, model_type, task, **model_kwargs): load_funcs = { "masked_language_modeling": SparseAutoModel.masked_language_modeling_from_pretrained, # noqa From 428e0e35641c57077c99b9a31e37ff7d732a676e Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Fri, 7 Jul 2023 17:00:53 -0400 Subject: [PATCH 12/47] [one shot] allow custom use cases with warning (#254) --- src/sparsify/cli/opts.py | 48 ++++++++++++++++++++++++++-------------- src/sparsify/cli/run.py | 6 ++--- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/sparsify/cli/opts.py b/src/sparsify/cli/opts.py index d3dc7599..9efc78e0 100644 --- a/src/sparsify/cli/opts.py +++ b/src/sparsify/cli/opts.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os +from functools import partial import click from sparsify.utils.constants import TASK_REGISTRY @@ -20,7 +22,6 @@ __all__ = [ "EXPERIMENT_TYPE", - "USE_CASE", "PROJECT_ID", "EXPERIMENT_ID", "WORKING_DIR", @@ -42,12 +43,14 @@ "add_optim_opts", ] +_LOGGER = logging.getLogger(__name__) + _EXPERIMENT_TYPES = ["sparse-transfer", "one-shot", "training-aware"] _EVAL_METRICS = ["accuracy", "mAP", "recall", "f1"] # TODO: add back kl _DEPLOY_ENGINES = ["deepsparse", "onnxruntime"] -def validate_use_case(ctx, param, value): +def validate_use_case(ctx, param, value, strict: bool = True): # click validator for --use-case # task_name: TaskName @@ -55,9 +58,18 @@ def validate_use_case(ctx, param, value): # TaskName __eq__ matches against aliases and str standardization if value == task_name: return value - raise ValueError( - f"Unknown use-case {value}, supported use cases: {list(TASK_REGISTRY.keys())}" - ) + + if strict: + raise ValueError( + f"Unknown use-case {value}, supported use cases: " + f"{list(TASK_REGISTRY.keys())}" + ) + else: + _LOGGER.warning( + f"Unknown use-case {value}, full feature set may not be availble for " + "custom use cases" + ) + return value EXPERIMENT_TYPE = click.option( @@ -66,13 +78,6 @@ def validate_use_case(ctx, param, value): type=click.Choice(_EXPERIMENT_TYPES, case_sensitive=False), help="The type of the experiment to run", ) -USE_CASE = click.option( - "--use-case", - required=True, - type=str, - callback=validate_use_case, - help="The task this model is for", -) PROJECT_ID = click.option( "--project-id", default=None, @@ -153,10 +158,21 @@ def validate_use_case(ctx, param, value): TRAIN_KWARGS = click.option("--train-kwargs", default=None, type=str) -def add_info_opts(f): - for fn in [WORKING_DIR, EXPERIMENT_ID, PROJECT_ID, USE_CASE]: - f = fn(f) - return f +def add_info_opts(*, require_known_use_case=True): + use_case = click.option( + "--use-case", + required=True, + type=str, + callback=partial(validate_use_case, strict=require_known_use_case), + help="The task this model is for", + ) + + def wrapped(f): + for fn in [WORKING_DIR, EXPERIMENT_ID, PROJECT_ID, use_case]: + f = fn(f) + return f + + return wrapped def add_model_opts(*, require_model: bool, include_optimizer: bool = False): diff --git a/src/sparsify/cli/run.py b/src/sparsify/cli/run.py index 230fb6a9..326de945 100644 --- a/src/sparsify/cli/run.py +++ b/src/sparsify/cli/run.py @@ -34,7 +34,7 @@ def main(): @main.command() -@opts.add_info_opts +@opts.add_info_opts(require_known_use_case=False) @opts.add_model_opts(require_model=True) @opts.add_data_opts @opts.add_deploy_opts @@ -66,7 +66,7 @@ def one_shot(**kwargs): @main.command() -@opts.add_info_opts +@opts.add_info_opts(require_known_use_case=True) @opts.add_model_opts(require_model=False) @opts.add_data_opts @opts.add_deploy_opts @@ -85,7 +85,7 @@ def sparse_transfer(**kwargs): @main.command() -@opts.add_info_opts +@opts.add_info_opts(require_known_use_case=True) @opts.add_model_opts(require_model=True) @opts.add_data_opts @opts.add_deploy_opts From 6b2bef71c1429b8ec376fa3c211af7291e9190b0 Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Fri, 7 Jul 2023 17:25:19 -0400 Subject: [PATCH 13/47] Wrap sparsifyml import error with information about python3-dev requirement (#255) --- src/sparsify/login.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/sparsify/login.py b/src/sparsify/login.py index ff212dff..687ad733 100644 --- a/src/sparsify/login.py +++ b/src/sparsify/login.py @@ -99,7 +99,16 @@ def install_sparsifyml(access_token: str) -> None: :param access_token: The access token to use for authentication """ sparsifyml_spec = importlib.util.find_spec("sparsifyml") - sparsifyml = importlib.import_module("sparsifyml") if sparsifyml_spec else None + + try: + sparsifyml = importlib.import_module("sparsifyml") if sparsifyml_spec else None + except ImportError as sparsifyml_import_error: + raise RuntimeError( + "sparsifyml installation detected in current environment, but an " + "exception was raised on import. ensure python3-dev is installed " + "for your python version and the `libpython` executable is available then " + f"re-run sparsify.login.\n\n{sparsifyml_import_error}" + ) sparsifyml_installed = ( sparsifyml_spec is not None From 3f61cab2c5469e8e3269b410303a5c4e1c39e352 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Mon, 10 Jul 2023 17:58:11 -0400 Subject: [PATCH 14/47] Fix Sparsifyml not installed prompt (#256) * replace sparsifyml import by an authenticated import to raise informative error * Quality --- src/sparsify/check_environment/ort_health.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sparsify/check_environment/ort_health.py b/src/sparsify/check_environment/ort_health.py index e7a2cda5..03ad9f9f 100644 --- a/src/sparsify/check_environment/ort_health.py +++ b/src/sparsify/check_environment/ort_health.py @@ -23,7 +23,11 @@ import onnxruntime as ort from deepsparse.utils import generate_random_inputs, get_input_names -from sparsifyml.one_shot.utils import run_onnx_model +from sparsify.login import import_sparsifyml_authenticated + + +import_sparsifyml_authenticated() +from sparsifyml.one_shot.utils import run_onnx_model # noqa: E402 __all__ = ["check_ort_health"] From c540cce89beb16fe944cb68aee7c4fe6787e2bd3 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+rsnm2@users.noreply.github.com> Date: Tue, 11 Jul 2023 13:55:46 -0400 Subject: [PATCH 15/47] Update datasets-guide.md (#245) Fixed to properly save numpy for transformers --- docs/datasets-guide.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/datasets-guide.md b/docs/datasets-guide.md index 6c68115e..49e6a9f5 100644 --- a/docs/datasets-guide.md +++ b/docs/datasets-guide.md @@ -199,37 +199,37 @@ class NumpyExportWrapper(torch.nn.Module): self.model = model self.model.eval() # Set model to evaluation mode self.numpy_data = [] - + def forward(self, *args, **kwargs): with torch.no_grad(): inputs = {} batch_size = 0 - + for index, arg in enumerate(args): if isinstance(arg, Tensor): inputs[f"input_{index}"] = arg batch_size = arg.size[0] - + for key, val in kwargs.items(): if isinstance(val, Tensor): inputs[key] = val - batch_size = arg.size[0] - + batch_size = val.shape[0] + start_index = len(self.numpy_data) for _ in range(batch_size): self.numpy_data.append({}) - - for index, (input_key, input_batch) in enumerate(inputs): - for input_ in input_batch: - self.numpy_data[start_index + index][input_key] = input_ - + + for input_key in iter(inputs): + for idx, input in enumerate(inputs[input_key]): + self.numpy_data[start_index+idx][input_key] = input + return self.model(*args, **kwargs) def save(self, path: str = "data"): for index, item in enumerate(self.numpy_data): npz_file_path = f'{path}/input{str(index).zfill(4)}.npz' np.savez(npz_file_path, **item) - + print(f'Saved {len(self.numpy_data)} npz files to {path}') model = NumpyExportWrapper(YOUR_MODEL) From 43c006160be15ebd3a77ecca4f107ba33ba94bbb Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Tue, 11 Jul 2023 14:01:35 -0400 Subject: [PATCH 16/47] Jfinks readme alpha (#247) * Update README.md Streamlining README; making progressive edits before final reviews * fix merge conflicts * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md Added in # links to overivew, should work I think? hard to test before committing. * Update README.md * Update README.md * Update README.md Added in note on logging in first before trying to run exp. * Update README.md * Update README.md * Update README.md Running Experiments rewrite * Update README.md * Update README.md TOC updates * Update README.md First near-final pass * Update README.md near-final draft, part 2 * Update README.md - First phase of README will be Quick Start Alpha. Focus user on 6-7 steps we want to accomplish. - Next phase: README will evolve into BETA content. - Eventual README will be GA content. * Update README.md formatting nit * Update README.md formatting nit fix * Update README.md - Update Readme docs with path/to/dataset * Revert "Update README.md - Update Readme docs with path/to/dataset" This reverts commit 639bbcde44aa426ef84aa08a03ce98d7ab955f88. * Update README.md - changed install to nightly * Update cli-api-guide.md - updated install to nightly * Update cloud-user-guide.md - updated install to nightly --------- Co-authored-by: Mark Kurtz Co-authored-by: Rob Greenberg <100797996+rgreenberg1@users.noreply.github.com> Co-authored-by: Rahul Tuli --- README.md | 263 +++++++++++++++++++++++---------------- docs/cli-api-guide.md | 2 +- docs/cloud-user-guide.md | 2 +- 3 files changed, 155 insertions(+), 112 deletions(-) diff --git a/README.md b/README.md index cd557988..c63d80a6 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ See the License for the specific language governing permissions and limitations under the License. --> -

tool icon  Sparsify [Alpha]

+

Sparsify tool icon  Sparsify [Alpha]

-

ML model optimization product to accelerate inference.

+

ML model optimization product to accelerate inference

@@ -73,115 +73,119 @@ limitations under the License. ![Logo](https://drive.google.com/uc?id=1XnlBKpRQdsnLC4IPoiCoihXJNFh8y7OL) -## Overview +# Welcome to the Sparsify Alpha Quick Start -Sparsify enables you to accelerate inference without sacrificing accuracy by applying state-of-the-art pruning, quantization, and distillation algorithms to Neural Networks with a simple web app and one-command API calls. +## Introduction +🚨 **July 2023: Sparsify's next generation is now in alpha as of version 1.6.0!** -To empower you in compressing models, Sparsify is made up of two components: the Sparsify Cloud and the Sparsify CLI/API. -The Sparsify Cloud is a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. -The Sparsify CLI/API is a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate into your own workflows. +This quick start provides a brief overview of Sparsify and then details several pathways you can work through. We encourage you to explore each for Sparsify's full benefits. As of this update, support for [Sparsify's first generation](https://docs.neuralmagic.com/sparsify) has been deprecated. We highly recommend you try the alpha to get a sneak peek and influence the product's development process. -To get started immediately, [create an account](https://account.neuralmagic.com/signup) and then check out the [Installation](#Installation) and [Quick Start](#quick-start) sections of this README. -With all of that setup, sparsifying your models is as easy as: +🚨 **Sparsify Alpha Feedback and Support** -```bash -sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 0.5 --train-kwargs '{"dataset": "imagenette"}' -``` +Report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). Alpha support is provided through those channels. -
+🚨 **Sparsify Alpha Terms and Conditions** -*🚨**Note**🚨: Sparsify is currently an alpha release, so you have the opportunity to influence the development process for the product. -You can report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), [email](mailto::rob@neuralmagic.com) or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). -As an alpha release, limited support is provided through the community with GitHub Issues and Slack, APIs and UIs are subject to change, and the product is not yet ready for production use. -Thank you for your interest and support!* +Sparsify Alpha is a pre-release version of Sparsify that is still in active development. The product is not yet ready for production use; APIs and UIs are subject to change. There may be bugs in the Alpha version, which we hope to have fixed before Beta and then a general Q3 2023 release. The feedback you provide on quality and usability helps us identify issues, fix them, and make Sparsify even better. This information is used internally by Neural Magic solely for that purpose. It is not shared or used in any other way. -## Installation +That being said, we are excited to share this release and hear what you think. Thank you in advance for your feedback and interest! -`pip` is the preferred method for installing Sparsify. -It is advised to create a fresh [virtual environment](https://docs.python.org/3/library/venv.html) to avoid dependency issues. +## Overview -Install with pip using: +Sparsify enables you to accelerate inference without sacrificing accuracy by applying state-of-the-art pruning, quantization, and distillation algorithms to neural networks with a simple web application and one-command API calls. + +Sparsify empowers you to compress models through two components: + +* Sparsify Cloud - a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. +* Sparsify CLI/API - a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate them into your own workflows. + +In this quick start, you will: + +1. [Verify prerequisites](#step-1-prerequisites). +2. [Create an account](#step-2-account-creation) a Neural Magic Account. +3. [Install](#step-3-installation) Sparsify in your local training environment. +4. [Log in](#step-4-sparsify-login) utilizing your API key. +5. [Run an Experiment](#step-5-running-experiments). + * [Experiments Overview](#experiments-overview) + * [One-Shot Experiments](#one-shot-experiments) + * [Sparse-Transfer Experiments](#sparse-transfer-experiments) + * [Training-Aware Experiments](#training-aware-experiments) + * [Command Syntax and Argument Guides](#command-syntax-and-arguments) + * `EXPERIMENT_TYPE` + * `USE_CASE` [guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md) + * `MODEL` [guide](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) + * `DATA`[guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) + * `OPTIM_LEVEL` [guide](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md) + * [Example Commands by Experiment Type](#example-commands-by-experiment-type) + * [Running One-Shot Experiments](#running-one-shot-experiments) + * [Running Sparse-Transfer Experiments](#running-sparse-transfer-experiments) + * [Running Training-Aware Experiments](#running-training-aware-experiments) +7. [Compare](#step-6-comparing-experiment-results) the Experiment results. +8. [Deploy optimized models](#step-7-deploying-your-model-with-deepsparse) with DeepSpare (optional). + +When you finish this quick start, sparsifying your models is as easy as: ```bash -pip install sparsify -``` +sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 50 --train-kwargs '{"dataset": "imagenette"}' -### Hardware and Software Requirements +``` +## Step 1: Prerequisites +First, verify that you have the correct software and hardware to run the Sparsify Alpha. -Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. +**Software:** Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. Sparsify is not supported natively on Windows and MAC OS. -Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. +**Hardware:** Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. We recommend you use a Linux system with a GPU that has a minimum of 16GB of GPU Memory, 128GB of RAM, 4 CPU cores, and is CUDA-enabled. If you are sparsifying a very large model, you may need more RAM than the recommended 128GB. -If you encounter issues setting up your training environment, file a GitHub issue [here](https://github.com/neuralmagic/sparsify/issues). - -## Quick Start +If you encounter issues setting up your training environment, [file a GitHub issue](https://github.com/neuralmagic/sparsify/issues). -We'll show you how to: +## Step 2: Account Creation -1. Create a Neural Magic Account. -2. Install Sparsify in your local training environment. -3. Login utilizing your API key. -4. Run an Experiment. -5. Compare the Experiment results. +Creating a new one-time account is simple and free. An account is required to manage your Experiments and API keys. +Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. -### Create a Neural Magic Account +See the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) for more details. -Creating a new account is simple and free. -An account is required to manage your Experiments and API keys. -Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and a unique password. -If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. +## Step 3: Installation -For more details, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). +`pip` is the preferred method for installing Sparsify. It is advised to create a fresh [virtual environment](https://docs.python.org/3/library/venv.html) to avoid dependency issues. -### Install Sparsify - -Next, you'll need to install Sparsify on your training hardware. -To do this, run the following command: +Install with pip using: ```bash -pip install sparsify +pip install sparsify-nightly ``` +## Step 4: Sparsify Login -For more details and system/hardware requirements, see the [Installation](#Installation) section. - -### Login to Sparsify - -With Sparsify installed on your training hardware, you'll need to authorize the local CLI to access your account. -This is done by running the `sparsify.login` command and providing your API key. -Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal. -Once you have located this, copy the command or the API key itself and run the following command: +Next, with Sparsify installed on your training hardware: +1. Authorize the local CLI to access your account by running the `sparsify.login` command and providing your API key. +2. Locate your API key on the homepage of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal, and copy the command or the API key itself. +3. Run the following command: ```bash sparsify.login API_KEY ```` -For more details on locating the API_KEY, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). +See the related guides for more details on: +* Locating the API_KEY - [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). +* Running the `sparsify.login` command - [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). -For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). +**Note:** Every time you use Sparsify, you will need to log in via the Sparsify CLI so that your local session can sync with your account in the Sparsify Cloud. -### Run an Experiment +## Step 5: Running Experiments -Experiments are the core of sparsifying a model. -They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. -All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. -To run an Experiment, you can use either the CLI or the API depending on your use case. -The Sparsify Cloud provides a UI for exploring hyperparameters, predicting performance, and generating the desired CLI/API command. -For more info on generating commands from the Sparsify Cloud, see the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). +In this section, you will learn about Sparsify Experiments and run an Experiment. -The general command for running an Experiment is: +### Experiments Overview +Experiments are the core of sparsifying a model. They allow you to apply sparsification algorithms to a dataset and model through the three Experiment types detailed below: One-Shot, Training-Aware, or Sparse-Transfer. All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison, using Sparsify's two components: -```bash -sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL -``` +* Sparsify Cloud - explore hyperparameters, predict performance, and generate the desired CLI/API command. + * See the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) for more details on generating commands from the Sparsify Cloud. + +* Sparsify CLI/API - run an experiment depending on your use case. -Where the values for each of the arguments follow these general rules: -- EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`; see the examples below for more details or the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). -- USE_CASE: the use case you're solving for such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). -- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md). -- DATA: the dataset you want to use to the sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and HuggingFace datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). -- OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md). +Learn more about the Experiment types and understand which use case might be best for your task. -#### Running One-Shot +#### One-Shot Experiments | Sparsity | Sparsification Speed | Accuracy | |----------|----------------------|----------| @@ -193,17 +197,7 @@ The algorithms are applied to the model post-training utilizing a calibration da Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. They are ideal for when you want to quickly sparsify your model and don't have a lot of time to spend on the sparsification process. -CV Example: -```bash -sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 -``` - -NLP Example: -```bash -sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 -``` - -#### Running Sparse-Transfer +#### Sparse-Transfer Experiments | Sparsity | Sparsification Speed | Accuracy | |----------|----------------------|-----------| @@ -217,17 +211,7 @@ Generally, Sparse-Transfer Experiments result in a 5-10x speedup with minimal ac They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. -CV Example: -```bash -sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 -``` - -NLP Example: -```bash -sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 -``` - -#### Running Training-Aware +#### Training-Aware Experiments | Sparsity | Sparsification Speed | Accuracy | |-----------|-----------------------|-----------| @@ -239,28 +223,74 @@ However, they do require additional training time and hyperparameter tuning to a Generally, Training-Aware Experiments result in a 6-12x speedup with minimal accuracy loss. They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. -Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. +Note, the model argument is optional for Sparse-Transfer Experiments, as Sparsify will select the best one from the SparseZoo for your use case if not supplied. -CV Example: +### Command Syntax and Arguments +Now that you have learned about Experiments and the various types, you are ready to run an Experiment. +Running Experiments uses the following general command: + +```bash +sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL +``` + +The values for each of the arguments follow these general rules: +- **`EXPERIMENT_TYPE`**: one of `one-shot`, `training-aware`, or `sparse-transfer`. +- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md) **`USE_CASE`**: the use case you're solving for, such as `image-classification`, `object-detection`, `text-classification`, or a custom use case. +- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) **`MODEL`**: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently, the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. +- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) **`DATA`**: the dataset you want to use to sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ-formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and Hugging Face datasets for NLP/NLG. +- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md) **`OPTIM_LEVEL`**: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. + +### Example Commands by Experiment Type + +Here are examples you may wish to run; pick your use case and see if you can successfully run your first experiment! +With successful experiments, a `model.onnx` file will be created in your working directory, which will be the optimized model, and you will have no CLI errors. + +#### Running One-Shot Experiments + +Computer Vision: +```bash +sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +``` + +NLP: +```bash +sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +``` + +#### Running Sparse-Transfer Experiments + +Computer Vision: +```bash +sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 +``` + +NLP: +```bash +sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 +``` + +#### Running Training-Aware Experiments + +Computer Vision: ```bash sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 ``` -NLP Example: +NLP: ```bash sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 ``` -### Compare the Experiment results +## Step 6: Comparing Experiment Results -Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. +Once you have run your Experiment, compare the results printed out to the console using the `deepsparse.benchmark` command. In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. -To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. +To compare the results of your Experiment with the original dense baseline model, use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. -For more information on benchmarking, see the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). +See the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md) for more information on benchmarking. Here is an example of a `deepsparse.benchmark`command: @@ -300,19 +330,32 @@ Iterations: 1346 *Note: performance improvement is not guaranteed across all runtimes and hardware types.* +## Step 7: Deploying Your Model With DeepSparse + +As an optional step to this quick start, now that you have your optimized model, you are ready for inferencing. To get the most inference performance out of your optimized model, we recommend you deploy on Neural Magic's [DeepSparse](https://docs.neuralmagic.com/deepsparse). DeepSparse is built to get the best performance out of optimized models on CPUs. + +DeepSparse Server takes in a task and a model path and will enable you to serve models and `Pipelines` for deployment in HTTP. + +You can deploy any ONNX model using DeepSparse Server by running: +```bash +deepsparse.server \ +task question_answering \ +--model_path "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni"` +``` -### Package for Deployment +To run inference on your own model, change the model path to the location of your `model.onnx` file. Consult the [DeepSparse Server Docs](https://docs.neuralmagic.com/user-guides/deploying-deepsparse/deepsparse-server) for more details. -Landing soon! +If you're not ready for deploying, congratulations on completing the quick start! We welcome your [Sparsify Alpha feedback and support issues](#feedback-and-support) as described at the beginning of this guide. -## Resources +# Resources +Now that you have explored the Sparsify Alpha Quick Start, here are other related resources. -### Learning More +## Learning More -- Documentation: [SparseML,](https://docs.neuralmagic.com/sparseml/) [SparseZoo,](https://docs.neuralmagic.com/sparsezoo/) [Sparsify,](https://docs.neuralmagic.com/sparsify/) [DeepSparse](https://docs.neuralmagic.com/deepsparse/) +- Documentation: [SparseML,](https://docs.neuralmagic.com/sparseml/) [SparseZoo,](https://docs.neuralmagic.com/sparsezoo/) [Sparsify (1st Generation),](https://docs.neuralmagic.com/sparsify/) [DeepSparse](https://docs.neuralmagic.com/deepsparse/) - Neural Magic: [Blog,](https://www.neuralmagic.com/blog/) [Resources](https://www.neuralmagic.com/resources/) -### Release History +## Release History Official builds are hosted on PyPI @@ -321,7 +364,7 @@ Official builds are hosted on PyPI Additionally, more information can be found via [GitHub Releases.](https://github.com/neuralmagic/sparsify/releases) -### License +## License The project is licensed under the [Apache License Version 2.0](https://github.com/neuralmagic/sparsify/blob/main/LICENSE). diff --git a/docs/cli-api-guide.md b/docs/cli-api-guide.md index 5ba36c87..1f9e9cdb 100644 --- a/docs/cli-api-guide.md +++ b/docs/cli-api-guide.md @@ -25,7 +25,7 @@ Next, you'll need to install Sparsify on your training hardware. To do this, run the following command: ```bash -pip install sparsify +pip install sparsify-nightly ``` For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify#installation) section. diff --git a/docs/cloud-user-guide.md b/docs/cloud-user-guide.md index 8e225dd2..a4d12503 100644 --- a/docs/cloud-user-guide.md +++ b/docs/cloud-user-guide.md @@ -42,7 +42,7 @@ Next, you'll need to install Sparsify on your training hardware. To do this, run the following command: ```bash -pip install sparsify +pip install sparsify-nightly ``` For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify/blob/main/README.md#installation) section. From 4ced38d9a1150dda88c5e140146bd60bac8052f1 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Tue, 11 Jul 2023 14:53:11 -0400 Subject: [PATCH 17/47] Fix tensorboard reloading (#258) * Use tensorboard logger for Logging Set the logging level to warnings to suppress constant stream of tensorboard logs * Keep sparsify level logs separate from Tensorboard level logs * remove global state, refactor suppression to a separate function --- src/sparsify/auto/scripts/main.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/sparsify/auto/scripts/main.py b/src/sparsify/auto/scripts/main.py index ea46270d..0cadd25c 100644 --- a/src/sparsify/auto/scripts/main.py +++ b/src/sparsify/auto/scripts/main.py @@ -26,6 +26,7 @@ from sparsify.schemas import APIArgs from sparsify.schemas.auto_api import SparsificationTrainingConfig from tensorboard.program import TensorBoard +from tensorboard.util import tb_logging _LOGGER = logging.getLogger("auto_banner") @@ -41,6 +42,8 @@ def main(api_args: APIArgs): deploy_directory, ) = create_save_directory(api_args) + _suppress_tensorboard_logs() + # Launch tensorboard server tensorboard_server = TensorBoard() tensorboard_server.configure(argv=[None, "--logdir", log_directory]) @@ -62,3 +65,10 @@ def main(api_args: APIArgs): runner.create_deployment_directory( train_directory=train_directory, deploy_directory=deploy_directory ) + + +def _suppress_tensorboard_logs(): + # set tensorboard logger to warning level + # avoids a constant stream of logs from tensorboard + tb_logger = tb_logging.get_logger() + tb_logger.setLevel(logging.WARNING) From 9c7d037c09fc7791454febcb1529dab4201468ec Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 12 Jul 2023 11:33:27 -0400 Subject: [PATCH 18/47] change default cache from ram to disk (#257) --- src/sparsify/auto/tasks/object_detection/yolov5/args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/object_detection/yolov5/args.py b/src/sparsify/auto/tasks/object_detection/yolov5/args.py index 8d766ef8..802b48ad 100644 --- a/src/sparsify/auto/tasks/object_detection/yolov5/args.py +++ b/src/sparsify/auto/tasks/object_detection/yolov5/args.py @@ -46,7 +46,7 @@ class _Yolov5BaseTrainArgs(BaseArgs): noautoanchor: bool = Field(default=False, description="disable AutoAnchor") bucket: str = Field(default="", description="gsutil bucket") cache: str = Field( - default="ram", description='--cache images in "ram" (default) or "disk"' + default="disk", description='--cache images in "ram" or "disk" (default)' ) image_weights: bool = Field( default=False, description="use weighted image selection for training" From 558bf727b07e6d33b41fbad6f562ca5e244c1ac4 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 12 Jul 2023 19:42:00 -0400 Subject: [PATCH 19/47] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c63d80a6..a65a0515 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ In this quick start, you will: When you finish this quick start, sparsifying your models is as easy as: ```bash -sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 50 --train-kwargs '{"dataset": "imagenette"}' +sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 0.5 --train-kwargs '{"dataset": "imagenette"}' ``` ## Step 1: Prerequisites From f85445ef0785f7453c2e9850d6f0e945ec12a96c Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 19:52:22 -0400 Subject: [PATCH 20/47] Update use-cases-guide.md grammar nits --- docs/use-cases-guide.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/use-cases-guide.md b/docs/use-cases-guide.md index 51358856..5895cdee 100644 --- a/docs/use-cases-guide.md +++ b/docs/use-cases-guide.md @@ -16,7 +16,7 @@ limitations under the License. # Sparsify Use Cases Guide -To use Sparsify, you must specify a use case for all experiments run. +To use Sparsify, you must specify a use case for all experiments to run. A use case is the specific task or domain/sub-domain you wish to sparsify a model for such as image classification, object detection, or text classification. It is used to enable Sparsify to apply the best sparsification techniques for your use case, to automatically package the model for deployment, and depending on what is run, to load specific pipelines for data loading and training. @@ -32,26 +32,26 @@ The generally supported use cases for Sparsify currently are: - NLP - token classification: `nlp-token_classification` - NLP - named entity recognition: `nlp-named_entity_recognition` -Note, other aliases are recognized for these use cases such as image-classification for cv-classification. +Note, other aliases are recognized for these use cases, such as image-classification for cv-classification. Sparsify will automatically recognize these aliases and apply the correct use case. ### Custom Use Cases If you wish to use Sparsify for a use case that is not in the list of currently supported use cases, you can use a custom use case for some pathways in Sparsify. -The custom use cases will be saved into the Sparsify cloud for future reuse when run through a supported pathway. +The custom use cases will be saved into the Sparsify Cloud for future reuse when run through a supported pathway. The pathways that support custom use cases are listed below. -Note, custom use cases will prevent Sparsify from applying known, domain specific knowledge for sparsification of your model. -Additionally, it will prevent autofill of the pre and post processing functions when creating a deployment package. +Note, custom use cases will prevent Sparsify from applying known, domain-specific knowledge for the sparsification of your model. +Additionally, it will prevent autofill of the pre- and post-processing functions when creating a deployment package. -#### One Shot +#### One-Shot -For One Shot experiments, both the CLIs and APIs always support custom use cases. -To utilize, run a one shot experiment with `--use-case` set to the desired custom use case. +For One-Shot Experiments, both the CLIs and APIs always will support custom use cases. +To utilize, run a One-Shot Experiment with `--use-case` set to the desired custom use case. -### Training Aware +### Training-Aware -For Training Aware experiments, custom use cases are only supported with the APIs for custom integrations. -This is because non-custom integrations utilize plugins that corresponding to the appropriate use case for training pipelines. +For Training-Aware Experiments, custom use cases are only supported with the APIs for custom integrations. +This is because non-custom integrations utilize plug-ins that correspond to the appropriate use case for training pipelines. To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. More info on this specific pathway will be available in the near future as Sparsify development progresses. From 9052b5b86d79f7c4034b31b39a129fba38f922ea Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 19:54:03 -0400 Subject: [PATCH 21/47] Update use-cases-guide.md grammar nits --- docs/use-cases-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/use-cases-guide.md b/docs/use-cases-guide.md index 5895cdee..2a55a132 100644 --- a/docs/use-cases-guide.md +++ b/docs/use-cases-guide.md @@ -17,8 +17,8 @@ limitations under the License. # Sparsify Use Cases Guide To use Sparsify, you must specify a use case for all experiments to run. -A use case is the specific task or domain/sub-domain you wish to sparsify a model for such as image classification, object detection, or text classification. -It is used to enable Sparsify to apply the best sparsification techniques for your use case, to automatically package the model for deployment, and depending on what is run, to load specific pipelines for data loading and training. +A use case is the specific task or domain/sub-domain you wish to sparsify a model for, such as image classification, object detection, or text classification. +It is used to enable Sparsify to apply the best sparsification techniques for your use case, to automatically package the model for deployment, and, depending on what is run, to load specific pipelines for data loading and training. ## Use Cases From c3d3995bee130535fb3bef4095a292a27e26ec41 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 20:06:34 -0400 Subject: [PATCH 22/47] Update optim-levels-guide.md grammar nits --- docs/optim-levels-guide.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/optim-levels-guide.md b/docs/optim-levels-guide.md index 8e3e062c..5f1fac5e 100644 --- a/docs/optim-levels-guide.md +++ b/docs/optim-levels-guide.md @@ -17,7 +17,7 @@ limitations under the License. # Sparsify Optim (Sparsification) Levels Guide When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. -Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. +Specifically, it controls how much sparsification is applied to your model, with higher values resulting in faster and more compressed models. At the max range, though, you may see a drop in accuracy. The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. 0.5 is the default optim level and is a good starting point for most use cases. @@ -25,13 +25,13 @@ The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsif ## Optim Level Values The general rule is that 0.0 is the baseline model, <0.3 only quantizes the model, and 0.3-1.0 increases the sparsity (unstructured/structured pruning) of the model and applies quantization. -The exact mappings of optim levels depends on the experiment type. +The exact mappings of optim levels depend on the experiment type. The current mappings for each experiment type are listed below. Note, these mappings are subject to change in future releases as we continue to improve Sparsify with new algorithms and capabilities. ### One-Shot Optim Levels -Given that one shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with sparse transfer or training aware. +Given that One-Shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with Sparse-Transfer or Training-Aware. The specific ranges are the following: - optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. @@ -41,15 +41,15 @@ The specific ranges are the following: The default of 0.5 will result in a ~50% sparse model with INT8 quantization. -### Sparse Transfer Optim Levels +### Sparse-Transfer Optim Levels -Sparse transfer mappings are a bit different from one shot and training aware since it maps to models available in the SparseZoo to transfer from. +Sparse-Transfer mappings are a bit different from One-Shot and Training-Aware since it maps to models available in the SparseZoo to transfer from. Increasing the optim level will result in smaller and more compressed models. The specific mappings are the following: - optim-level == 0.0: the largest model selected from the SparseZoo with no optimizations. - optim-level < 0.25: the largest model selected from the SparseZoo with INT8 quantization applied to the model (activations and weights). -- optim-level < 0.5: the largest model selected form the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. +- optim-level < 0.5: the largest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. - optim-level < 0.75: the medium model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. - optim-level <= 1.0: the smallest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. @@ -57,7 +57,7 @@ The default of 0.5 will result in a medium-sized sparse model with INT8 quantiza ### Training-Aware Optim Levels -Given that training aware is applied while training, the sparsity ranges are increased as compared to one shot since accuracy recovery is easier at higher sparsities. +Given that Training-Aware is applied while training, the sparsity ranges are increased as compared to One-Shot since accuracy recovery is easier at higher sparsities. The specific ranges are the following: - optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. From c457117a2d776c5ef0eb3d615245f858194a05f6 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 20:16:28 -0400 Subject: [PATCH 23/47] Update models-guide.md grammar nits --- docs/models-guide.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/models-guide.md b/docs/models-guide.md index 93f5be86..1f8b9aff 100644 --- a/docs/models-guide.md +++ b/docs/models-guide.md @@ -17,18 +17,18 @@ limitations under the License. # Sparsify Models Guide For any Sparsify Experiments, a dense model can be supplied for sparsification. -One Shot is the only experiment type that requires a model to be passed in. +One-Shot is the only experiment type that requires a model to be passed in. For others, a default model will be chosen to best fit the given use case. -Due to the varied ML pipelines and implementations, Sparsify standardizes on a few, popular formats for models. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for models. You will need to make sure that your models are formatted properly according to the standards listed below. -## One Shot +## One-Shot -The ONNX model format is the only currently supported one for one shot. -See the SparseML documentation for exporting to ONNX formats. -In the near future, more formats will be added for support with one shot. +The ONNX model format is the only currently supported format for One-Shot. +See the [SparseML documentation](https://docs.neuralmagic.com) for exporting to ONNX formats. +In the near future, more formats will be added for support with One-Shot. -## Training Aware and Sparse Transfer +## Training-Aware and Sparse-Transfer -The PyTorch model format is the only currently supported one for training aware and sparse transfer experiments. +The PyTorch model format is the only currently supported format for Training-Aware and Sparse-Transfer Experiments. The exact format will depend on the pipeline, and therefore the use case, for the experiment. From ddcb3f8a51350a84cd8e1e3591a2d5533586a074 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 20:22:48 -0400 Subject: [PATCH 24/47] Update datasets-guide.md grammar nits --- docs/datasets-guide.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/datasets-guide.md b/docs/datasets-guide.md index 49e6a9f5..0e9d4028 100644 --- a/docs/datasets-guide.md +++ b/docs/datasets-guide.md @@ -17,14 +17,14 @@ limitations under the License. # Sparsify Datasets Guide For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. -Due to the varied ML pipelines and implementations, Sparsify standardizes on a few, popular formats for datasets. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. You will need to make sure that your data is formatted properly according to the standards listed below. ## Predefined Use Cases -### Training Aware and Sparse Transfer +### Training-Aware and Sparse-Transfer -Training Aware and Sparse Transfer utilize specific dataset standards depending on the use case. +Training-Aware and Sparse-Transfer utilize specific dataset standards depending on the use case. Each one is listed below with an example. #### Image Classification @@ -40,7 +40,7 @@ This format is fairly simple and intuitive, and it is also widely used in the ma - The images should be in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. - Images do not need to be of the same size. -The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. +The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. Therefore, it is crucial to ensure the directories are properly named to avoid any confusion or mislabeling. ##### Example @@ -62,13 +62,13 @@ The exact filenames ('xxx.png', 'xxy.png', etc.) do not matter; what matters is By organizing the data in this way, it can be easily read and labeled by the PyTorch ImageFolder class, and thus easily used for training image classification models in Sparsify. -Please note, the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. -Here, 'cat' will be considered class 0 and 'dog' will be class 1, due to alphabetical order. +Note, the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. +Here, 'cat' will be considered class 0, and 'dog' will be class 1, due to alphabetical order. #### Object Detection / Image Segmentation For object detection and image segmentation tasks, Sparsify supports the dataset format used by YOLOv5. -This format is specifically designed for tasks involving bounding boxes and segmentation masks, and is widely adopted in the community. +This format is specifically designed for tasks involving bounding boxes and segmentation masks and is widely adopted in the community. ##### Specifications @@ -123,7 +123,7 @@ Hugging Face datasets can be represented in various file formats including JSON, ##### Example -Here's an example of how you might structure a dataset for a sentiment analysis task: +Here is an example of how you might structure a dataset for a sentiment analysis task: If you're using a JSON lines (.jsonl) format, your file could look like this: @@ -149,11 +149,11 @@ The first row contains the column names, and each subsequent row represents a si Whether you choose to use JSON lines or CSV will depend on your specific needs and preferences, but either format will work well with Hugging Face and Sparsify. Make sure your data is formatted correctly according to these specifications to ensure it can be used in your experiments. -### One Shot +### One-Shot -For one-shot experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. +For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. This format is efficient and versatile. -In the near future, more functionality will be landed such that the definitions given above for Training Aware and Sparse Transfer will work as well. +In the near future, more functionality will be landed such that the definitions given above for Training-Aware and Sparse-Transfer will work as well. #### Specifications @@ -238,7 +238,7 @@ for data in YOUR_DATA_LOADER: model.save() ``` -Note: Replace YOUR_MODEL and YOUR_DATA_LOADER with your PyTorch model and data loader, respectively. +Note: Replace `YOUR_MODEL` and `YOUR_DATA_LOADER` with your PyTorch model and data loader, respectively. ## Custom Use Cases From 34dedf40e6e134d34b154fdd7ac75511a3772b48 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 20:51:08 -0400 Subject: [PATCH 25/47] Update cloud-user-guide.md grammar nits --- docs/cloud-user-guide.md | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/cloud-user-guide.md b/docs/cloud-user-guide.md index a4d12503..3ba275c8 100644 --- a/docs/cloud-user-guide.md +++ b/docs/cloud-user-guide.md @@ -22,24 +22,23 @@ The Sparsify Cloud is a web application that allows you to create and manage Spa In this Sparsify Cloud User Guide, we will show you how to: 1. Create a Neural Magic Account. 2. Install Sparsify in your local training environment. -3. Login utilizing your API key. -4. Run an Experiment +3. Log in using your API key. +4. Run an Experiment. 5. Compare the Experiment results. -## Create a Neural Magic Account +## Creating a Neural Magic Account Creating a new account is simple and free. An account is required to manage your Experiments and API keys. -Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and a unique password. +Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. [![SignIn](https://drive.google.com/uc?id=1RInSrLsfm0PQLEkjJqD1HzaCWA2yDcNi)](https://drive.google.com/uc?id=1RInSrLsfm0PQLEkjJqD1HzaCWA2yDcNi) -## Install Sparsify in your local training environment +## Installing Sparsify in Your Local Training Environment -Next, you'll need to install Sparsify on your training hardware. -To do this, run the following command: +Next, install Sparsify on your training hardware by running the following command: ```bash pip install sparsify-nightly @@ -47,41 +46,41 @@ pip install sparsify-nightly For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify/blob/main/README.md#installation) section. -You may copy the command from the Sparsify Cloud in step 1 and run that in your training environment to install Sparsify. +You may copy the command from the Sparsify Cloud in Step 1 in the following screenshot and run that in your training environment to install Sparsify. [![Homepage](https://drive.google.com/uc?id=10U3r7lr4fmdKLG_xzRys2avdf2g2GVIN)](https://drive.google.com/uc?id=10U3r7lr4fmdKLG_xzRys2avdf2g2GVIN) -## Login utilizing your API key +## Log in Utilizing Your API key With Sparsify installed on your training hardware, you'll need to authorize the local CLI to access your account. This is done by running the `sparsify.login` command and providing your API key. -Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify/) under the **'Get set up'** modal. +Locate your API key on the homepage of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify/) under the **'Get set up'** modal. Once you have located this, copy the command or the API key itself and run the following command: ```bash sparsify.login API_KEY ```` -You may copy the command from the Sparsify Cloud in step 2 and run that in your training environment after installing Sparsify to log in via the Sparsify CLI. For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). +You may copy the command from the Sparsify Cloud in Step 2 and run that in your training environment after installing Sparsify to log in via the Sparsify CLI. For more details on the `sparsify.login` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). -## Run an Experiment +## Running an Experiment Experiments are the core of sparsifying a model. They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. -All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. +All Experiments are run locally on your training hardware and can be synced with Sparsify Cloud for further analysis and comparison. -To run an Experiment, use the Sparsify Cloud to generate a code command to run in your training environment.: +To run an Experiment, use the Sparsify Cloud to generate a code command to run in your training environment: 1. Click on 'Start Sparsifyng' in the top right corner of the Sparsify Cloud Homepage to bring up the ```Sparsify a model``` modal. ![Sparsify a model](https://drive.google.com/uc?id=1FyayVSqq5YtKO_dEgt5iMNSZQNsqaQFq) -3. Select a Use Case for your model. Note that if your use case is not present in the dropdown, fear not; the use case does not affect the optimization of the model. +3. Select a use case for your model. Note that if your use case is not present in the dropdown, fear not; the use case does not affect the optimization of the model. 4. Choose the Experiment Type. To learn more about the Experiments, see the [Sparsify README](https://github.com/neuralmagic/sparsify/blob/main/README.md#run-an-experiment). -5. Adjust the Hyperparameter Compression slider to designate whether you would like to to optimize the model for performance, accuracy, or a balance of both. Note that selecting an extreme on the slider will not completely tank the opposing metric. -6. Click 'Generate Code Snippet' to view the code snipppet generated from your sparsification selections on the next modal. +5. Adjust the Hyperparameter Compression slider to designate whether you would like to optimize the model for performance, accuracy, or a balance of both. Note that selecting an extreme on the slider will not completely tank the opposing metric. +6. Click 'Generate Code Snippet' to view the code snippet generated from your sparsification selections on the next modal. ![Generate Code Snippetl](https://drive.google.com/uc?id=14B193hHeYqLeSX8r6C5N1G8beBeXUkYE) 7. Once your code snippet is generated, make sure you have installed Sparsify and are logged in via the CLI. @@ -93,10 +92,10 @@ To run an Experiment, use the Sparsify Cloud to generate a code command to run i To learn more about the arguments for the `sparsify.run` command, see the [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). -## Compare the Experiment results +## Comparing the Experiment Results Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. -In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. +In the near future, you will be able to compare the results in Sparsify Cloud, measure other scenarios, and compare the results to other Experiments. To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. From e543448fc7ae5bffedf8154a4af09e436dc2f78c Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 20:58:45 -0400 Subject: [PATCH 26/47] Update cli-api-guide.md grammar nits --- docs/cli-api-guide.md | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/docs/cli-api-guide.md b/docs/cli-api-guide.md index 1f9e9cdb..41d12621 100644 --- a/docs/cli-api-guide.md +++ b/docs/cli-api-guide.md @@ -19,10 +19,9 @@ limitations under the License. The Sparsify CLI/API is a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate into your own workflows. -## Install Sparsify +## Installing Sparsify -Next, you'll need to install Sparsify on your training hardware. -To do this, run the following command: +Next, install Sparsify on your training hardware by running the following command: ```bash pip install sparsify-nightly @@ -30,9 +29,9 @@ pip install sparsify-nightly For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify#installation) section. -## Login to Sparsify +## Logging in to Sparsify -With Sparsify installed on your training hardware, you'll need to authorize the local CLI to access your account. +With Sparsify installed on your training hardware, you will need to authorize the local CLI to access your account. This is done by running the `sparsify.login` command and providing your API key. Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal. Once you have located this, copy the command or the API key itself and run the following command: @@ -41,19 +40,19 @@ Once you have located this, copy the command or the API key itself and run the f sparsify.login API_KEY ```` -The `sparsify.login API_KEY` command is used to sync your local training environment with the Sparsify Cloud in order to keep track of your Experiments. Once you run the `sparsify.login API_KEY` command, you should see a confirmation via the console that you are logged into Sparsify. To log out of Sparsify, use the `exit` command. +The `sparsify.login API_KEY` command is used to sync your local training environment with the Sparsify Cloud in order to keep track of your Experiments. Once you run the `sparsify.login API_KEY` command, you should see a confirmation via the console that you are logged in to Sparsify. To log out of Sparsify, use the `exit` command. If you encounter any issues with your API key, reach out to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), [email](mailto::rob@neuralmagic.com) or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). -## Run an Experiment +## Running an Experiment Experiments are the core of sparsifying a model. They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. -All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison. +All Experiments are run locally on your training hardware and can be synced with Sparsify Cloud for further analysis and comparison. -To run an Experiment, you can use either the CLI or the API depending on your use case. +To run an Experiment, you can use either the CLI or the API, depending on your use case. The Sparsify Cloud provides a UI for exploring hyperparameters, predicting performance, and generating the desired CLI/API command. The general command for running an Experiment is: @@ -65,11 +64,11 @@ sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --opt Where the values for each of the arguments follow these general rules: - EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`. -- USE_CASE: the use case you're solving for such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). +- USE_CASE: the use case you're solving for, such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). -- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md). +- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently, the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md). -- DATA: the dataset you want to use to the sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and HuggingFace datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). +- DATA: the dataset you want to use to sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and Hugging Face datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). - OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md). @@ -82,10 +81,10 @@ Where the values for each of the arguments follow these general rules: | **++** | **+++++** | **+++** | One-Shot Experiments are the quickest way to create a faster and smaller version of your model. -The algorithms are applied to the model post training utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. +The algorithms are applied to the model post-training, utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. -They are ideal for when you want to quickly sparsify your model and don't have a lot of time to spend on the sparsification process. +They are ideal for when you want to quickly sparsify your model and have limited time to spend on the sparsification process. CV Example: ```bash @@ -149,7 +148,7 @@ sparsify.run training-aware --use-case text_classification --model bert-base --d Landing Soon! -## Compare the Experiment results +## Comparing the Experiment results Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. From d6a12cdd1da7c4ca950adf52636e34fb12d42102 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 21:05:06 -0400 Subject: [PATCH 27/47] Update use-cases-guide.md grammar --- docs/use-cases-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/use-cases-guide.md b/docs/use-cases-guide.md index 2a55a132..32205c36 100644 --- a/docs/use-cases-guide.md +++ b/docs/use-cases-guide.md @@ -52,6 +52,6 @@ To utilize, run a One-Shot Experiment with `--use-case` set to the desired custo ### Training-Aware For Training-Aware Experiments, custom use cases are only supported with the APIs for custom integrations. -This is because non-custom integrations utilize plug-ins that correspond to the appropriate use case for training pipelines. +This is because non-custom integrations utilize plugins that correspond to the appropriate use case for training pipelines. To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. More info on this specific pathway will be available in the near future as Sparsify development progresses. From 50211983c5c8222d8d4b11a66d6e2dc0867bfbec Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Wed, 12 Jul 2023 21:46:24 -0400 Subject: [PATCH 28/47] Update use-cases-guide.md grammar nits --- docs/use-cases-guide.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/use-cases-guide.md b/docs/use-cases-guide.md index 32205c36..b234e039 100644 --- a/docs/use-cases-guide.md +++ b/docs/use-cases-guide.md @@ -38,11 +38,11 @@ Sparsify will automatically recognize these aliases and apply the correct use ca ### Custom Use Cases If you wish to use Sparsify for a use case that is not in the list of currently supported use cases, you can use a custom use case for some pathways in Sparsify. -The custom use cases will be saved into the Sparsify Cloud for future reuse when run through a supported pathway. +The custom use cases will be saved in Sparsify Cloud for future reuse when run through a supported pathway. The pathways that support custom use cases are listed below. -Note, custom use cases will prevent Sparsify from applying known, domain-specific knowledge for the sparsification of your model. -Additionally, it will prevent autofill of the pre- and post-processing functions when creating a deployment package. +Note that custom use cases will prevent Sparsify from applying known, domain-specific knowledge for the sparsification of your model. +Additionally, it will prevent auto-filling of the pre- and post-processing functions when creating a deployment package. #### One-Shot @@ -54,4 +54,4 @@ To utilize, run a One-Shot Experiment with `--use-case` set to the desired custo For Training-Aware Experiments, custom use cases are only supported with the APIs for custom integrations. This is because non-custom integrations utilize plugins that correspond to the appropriate use case for training pipelines. To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. -More info on this specific pathway will be available in the near future as Sparsify development progresses. +More information on this specific pathway will be available in the near future as Sparsify's development progresses. From bcd736ea43bf5349d2fc50e63fa2e13435df059d Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 13 Jul 2023 09:33:18 -0400 Subject: [PATCH 29/47] Add functionality to build yolov5 yaml (#260) * add initial functionality to build a yaml file for yolo workflows if the user provides a local directory to the data arg * finish code for yaml generation * typo fix * format fix * move function to helpers; clean-up function --- .../tasks/object_detection/yolov5/runner.py | 5 +- src/sparsify/auto/utils/helpers.py | 97 +++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) diff --git a/src/sparsify/auto/tasks/object_detection/yolov5/runner.py b/src/sparsify/auto/tasks/object_detection/yolov5/runner.py index bb29f067..5f1784c8 100644 --- a/src/sparsify/auto/tasks/object_detection/yolov5/runner.py +++ b/src/sparsify/auto/tasks/object_detection/yolov5/runner.py @@ -27,7 +27,7 @@ from sparseml.yolov5.scripts import train as train_hook from sparsify.auto.tasks.object_detection.yolov5 import Yolov5ExportArgs from sparsify.auto.tasks.runner import DDP_ENABLED, TaskRunner -from sparsify.auto.utils import HardwareSpecs +from sparsify.auto.utils import HardwareSpecs, create_yolo_data_yaml from sparsify.schemas import Metrics, SparsificationTrainingConfig from sparsify.utils import TASK_REGISTRY from yolov5.models.experimental import attempt_load @@ -80,11 +80,12 @@ def config_to_args( :param config: training config to generate run for :return: tuple of training and export arguments """ + dataset = create_yolo_data_yaml(config.dataset) train_args = Yolov5TrainArgs( weights=config.base_model, recipe=config.recipe, recipe_args=config.recipe_args, - data=config.dataset, + data=dataset, **config.kwargs, ) diff --git a/src/sparsify/auto/utils/helpers.py b/src/sparsify/auto/utils/helpers.py index 49b1d22d..87a516f4 100644 --- a/src/sparsify/auto/utils/helpers.py +++ b/src/sparsify/auto/utils/helpers.py @@ -17,6 +17,7 @@ """ import logging import os +import re from collections import OrderedDict from datetime import datetime from typing import Any, Dict, List, Tuple, Union @@ -32,6 +33,7 @@ "load_raw_config_history", "best_n_trials_from_history", "initialize_banner_logger", + "create_yolo_data_yaml", ] SAVE_DIR = "{{run_mode}}_{{task}}{:_%Y_%m_%d_%H_%M_%S}".format(datetime.now()) @@ -47,6 +49,101 @@ def initialize_banner_logger(): logger.addHandler(handler) +def create_yolo_data_yaml(dataset: str) -> str: + """ + Check if the dataset provided is a data directory. If it is, buid a yolov5 yaml + file based on the provided data directory path. An example of the directory + structure for the provided directory path is shown below. There must + subdirectories in the provided directory named `images`, `labels` and a text + file called `classes.txt` which includes the list of the classes for the + particular dataset, ordered by class id. The `images` and `labels` folders + should contain identically named train, test, and validation data folder. + For details on what images and labels should look like, please see the yolov5 + repository: https://github.com/ultralytics/yolov5/tree/master. + + Example directory structure: + - data_for_training/ + - labels/ + - train/ + - val/ + - test/ + - images/ + - train/ + - val/ + - test/ + - classes.txt + + :params dataset: inputted data string arg. Assumed to either be a dataset which + can be downloaded publicly or a locally available directory containing + data files. + + :returns: path to yaml to download or the newly built yaml. If the data string + arg is a yaml for a publicly available dataset, this function will return the + same string. Otherwise, the path to the newly generated yaml will be returned. + """ + data_file_args = {} + image_dir = "images" + class_path = "classes.txt" + yaml_path = "data_local.yaml" + + def _check_and_update_file(file_type: str, path: str): + if data_file_args.get(file_type, None): + data_file_args[file_type].append(path) + else: + data_file_args[file_type] = [path] + + if not os.path.isdir(dataset): + return dataset + + image_path = os.path.join(dataset, image_dir) + class_list_path = os.path.join(dataset, class_path) + + if not os.path.exists(image_path): + raise ValueError( + f"The the provided directory path {dataset} " + "does not contain a folder called `images`. A subdirectory must " + "exist which contains the data folders." + ) + + if not os.path.exists(class_list_path): + raise ValueError( + f"The the provided directory path {dataset} " + "does not contain a classes.txt file. A file must be " + "present which includes a list of the classes for the dataset." + ) + + data_file_args["path"] = dataset + + for d in os.listdir(image_path): + current_path = os.path.join(image_dir, d) + if re.search(r"train", d): + _check_and_update_file("train", current_path) + elif re.search(r"val", d): + _check_and_update_file("val", current_path) + elif re.search(r"test", d): + _check_and_update_file("test", current_path) + + if not (data_file_args.get("train") and data_file_args.get("val")): + raise Exception( + "No training or validation folders found. Be sure the " + "directory provided to the data arg contains folders " + "with the train and val substrings in the filenames." + ) + + # Store the newly generated yaml in the same directory as the data + dataset = os.path.join(dataset, yaml_path) + + with open(class_list_path, "r") as f: + class_list = f.readlines() + + classes = {idx: label.strip() for idx, label in enumerate(class_list)} + + with open(dataset, "w") as f: + yaml.safe_dump({**data_file_args, "names": classes}, f, sort_keys=False) + + return dataset + + def create_save_directory(api_args: "APIArgs") -> Tuple[str]: # noqa: F821 """ Create base save directory structure for a single sparsify.auto run From 49f3ea07636e4e56ab34110eb187b3a47170213d Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Thu, 13 Jul 2023 09:42:35 -0400 Subject: [PATCH 30/47] update requried json lines extension in datasets-guide.md (#261) --- docs/datasets-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/datasets-guide.md b/docs/datasets-guide.md index 0e9d4028..a1f1f466 100644 --- a/docs/datasets-guide.md +++ b/docs/datasets-guide.md @@ -111,7 +111,7 @@ Make sure the class labels are consistent with what is expected by the YOLOv5 co #### Natural Language (NLP/NLG) For natural language processing (NLP) and natural language generation (NLG) tasks, Sparsify supports the dataset formats used by the Hugging Face library. -Hugging Face datasets can be represented in various file formats including JSON, CSV, and JSON lines format (.jsonl). +Hugging Face datasets can be represented in various file formats including JSON, CSV, and JSON lines format (.json). ##### Specifications @@ -125,7 +125,7 @@ Hugging Face datasets can be represented in various file formats including JSON, Here is an example of how you might structure a dataset for a sentiment analysis task: -If you're using a JSON lines (.jsonl) format, your file could look like this: +If you're using a JSON lines (.json) format, your file could look like this: ``` {"text": "I love this movie!", "label": "positive"} From 2ffc40f76140d252b1f5e92c6c64aeca0ea55177 Mon Sep 17 00:00:00 2001 From: Rob Greenberg <100797996+rgreenberg1@users.noreply.github.com> Date: Thu, 13 Jul 2023 12:20:16 -0400 Subject: [PATCH 31/47] Update README.md - quickstart turning into 1 word update (#259) * Update README.md - quickstart updates * Update cloud-user-guide.md - fixed UI images to reflect new UI * Create one-shot_experiment-guide * refactor readme * Rename one-shot_experiment-guide to one-shot_experiment-guide.md * Update one-shot_experiment-guide.md - Updated * Update one-shot_experiment-guide.md Grammar nits * Create sparse-transfer-experiment-guide.md * Update sparse-transfer-experiment-guide.md * Update sparse-transfer-experiment-guide.md * Create training-aware_experiment-guide.md * Update sparse-transfer-experiment-guide.md grammar nits * Update training-aware_experiment-guide.md grammar nits * Update sparse-transfer-experiment-guide.md grammar nits * Update training-aware_experiment-guide.md grammar nits * Update sparse-transfer-experiment-guide.md grammar nits * Update one-shot_experiment-guide.md grammar nits * Update one-shot_experiment-guide.md - small grammar change * adjust sparse transfer and training aware locations * Update README.md - added sign in UI, linked experiment guides * Update README.md - re-scaled image * Update one-shot_experiment-guide.md - removed Optim Level guide * Update sparse-transfer-experiment-guide.md - removed optim_level guide * Update training-aware_experiment-guide.md- removed optim-level guide * Update README.md - linked in companion guides * Update README.md - changed optim_level to 0.5 * Delete optim-levels-guide.md * Update one-shot_experiment-guide.md - Tutorial pathway note * Update sparse-transfer-experiment-guide.md grammar nit * Update training-aware_experiment-guide.md grammar nits * Update cloud-user-guide.md * Delete cli-api-guide.md --------- Co-authored-by: Mark Kurtz Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> --- README.md | 415 ++++++++++++----------- docs/cli-api-guide.md | 198 ----------- docs/cloud-user-guide.md | 1 - docs/one-shot_experiment-guide.md | 251 ++++++++++++++ docs/optim-levels-guide.md | 68 ---- docs/sparse-transfer-experiment-guide.md | 301 ++++++++++++++++ docs/training-aware_experiment-guide.md | 302 +++++++++++++++++ 7 files changed, 1068 insertions(+), 468 deletions(-) delete mode 100644 docs/cli-api-guide.md create mode 100644 docs/one-shot_experiment-guide.md delete mode 100644 docs/optim-levels-guide.md create mode 100644 docs/sparse-transfer-experiment-guide.md create mode 100644 docs/training-aware_experiment-guide.md diff --git a/README.md b/README.md index a65a0515..4e19c47a 100644 --- a/README.md +++ b/README.md @@ -15,22 +15,6 @@ See the License for the specific language governing permissions and limitations under the License. --> - -

Sparsify tool icon  Sparsify [Alpha]

ML model optimization product to accelerate inference

@@ -40,285 +24,284 @@ limitations under the License. Documentation
- + - + - Main + Main - GitHub release + GitHub release Stability - GitHub + GitHub - Contributor Covenant + Contributor Covenant - + - + - +

-![Logo](https://drive.google.com/uc?id=1XnlBKpRQdsnLC4IPoiCoihXJNFh8y7OL) - -# Welcome to the Sparsify Alpha Quick Start +**🚨 July 2023: Sparsify's next generation is now in alpha as of version 1.6.0!** -## Introduction -🚨 **July 2023: Sparsify's next generation is now in alpha as of version 1.6.0!** +Sparsify enables you to accelerate inference without sacrificing accuracy by applying state-of-the-art pruning, quantization, and distillation algorithms to neural networks with a simple web application and one-command API calls. -This quick start provides a brief overview of Sparsify and then details several pathways you can work through. We encourage you to explore each for Sparsify's full benefits. As of this update, support for [Sparsify's first generation](https://docs.neuralmagic.com/sparsify) has been deprecated. We highly recommend you try the alpha to get a sneak peek and influence the product's development process. +Sparsify empowers you to compress models through two components: +- **[Sparsify Cloud](https://apps.neuralmagic.com/sparsify/)** - a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. +- **Sparsify CLI/API** - a Python package and GitHub repository that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate them into your workflows. -🚨 **Sparsify Alpha Feedback and Support** +## Table of Contents -Report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). Alpha support is provided through those channels. +- [Quickstart Guide](#quickstart-guide) + - [Install and Setup](#1-install-and-setup) + - [Run an Experiment](#2-run-an-experiment) + - [Compare Results](#3-compare-results) + - [Deploy a Model](#4-deploy-a-model) +- [Companion Guides](#companion-guides) +- [Resources](#resources) -🚨 **Sparsify Alpha Terms and Conditions** +## Quickstart Guide -Sparsify Alpha is a pre-release version of Sparsify that is still in active development. The product is not yet ready for production use; APIs and UIs are subject to change. There may be bugs in the Alpha version, which we hope to have fixed before Beta and then a general Q3 2023 release. The feedback you provide on quality and usability helps us identify issues, fix them, and make Sparsify even better. This information is used internally by Neural Magic solely for that purpose. It is not shared or used in any other way. + +Interested in test-driving our alpha? +Get a sneak peek and influence the product's development process. +Thank you in advance for your feedback and interest! + -That being said, we are excited to share this release and hear what you think. Thank you in advance for your feedback and interest! +This quickstart details several pathways you can work through. +We encourage you to explore one for Sparsify's full benefits. +When you finish the quickstart, sparsifying your models is as easy as: -## Overview - -Sparsify enables you to accelerate inference without sacrificing accuracy by applying state-of-the-art pruning, quantization, and distillation algorithms to neural networks with a simple web application and one-command API calls. - -Sparsify empowers you to compress models through two components: - -* Sparsify Cloud - a web application that allows you to create and manage Sparsify Experiments, explore hyperparameters, predict performance, and compare results across both Experiments and deployment scenarios. -* Sparsify CLI/API - a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate them into your own workflows. +```bash +sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 0.5 +``` -In this quick start, you will: +### 1. Install and Setup -1. [Verify prerequisites](#step-1-prerequisites). -2. [Create an account](#step-2-account-creation) a Neural Magic Account. -3. [Install](#step-3-installation) Sparsify in your local training environment. -4. [Log in](#step-4-sparsify-login) utilizing your API key. -5. [Run an Experiment](#step-5-running-experiments). - * [Experiments Overview](#experiments-overview) - * [One-Shot Experiments](#one-shot-experiments) - * [Sparse-Transfer Experiments](#sparse-transfer-experiments) - * [Training-Aware Experiments](#training-aware-experiments) - * [Command Syntax and Argument Guides](#command-syntax-and-arguments) - * `EXPERIMENT_TYPE` - * `USE_CASE` [guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md) - * `MODEL` [guide](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) - * `DATA`[guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) - * `OPTIM_LEVEL` [guide](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md) - * [Example Commands by Experiment Type](#example-commands-by-experiment-type) - * [Running One-Shot Experiments](#running-one-shot-experiments) - * [Running Sparse-Transfer Experiments](#running-sparse-transfer-experiments) - * [Running Training-Aware Experiments](#running-training-aware-experiments) -7. [Compare](#step-6-comparing-experiment-results) the Experiment results. -8. [Deploy optimized models](#step-7-deploying-your-model-with-deepsparse) with DeepSpare (optional). +#### 1.1 Verify Prerequisites -When you finish this quick start, sparsifying your models is as easy as: +First, verify that you have the correct software and hardware to run the Sparsify Alpha. -```bash -sparsify.run sparse-transfer --use-case image-classification --data imagenette --optim-level 0.5 --train-kwargs '{"dataset": "imagenette"}' +
+Software -``` -## Step 1: Prerequisites -First, verify that you have the correct software and hardware to run the Sparsify Alpha. +Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. +Sparsify is not supported natively on Windows and MAC OS. +
-**Software:** Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. Sparsify is not supported natively on Windows and MAC OS. +
+Hardware -**Hardware:** Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. -We recommend you use a Linux system with a GPU that has a minimum of 16GB of GPU Memory, 128GB of RAM, 4 CPU cores, and is CUDA-enabled. If you are sparsifying a very large model, you may need more RAM than the recommended 128GB. +Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. +We recommend you use a Linux system with a GPU that has a minimum of 16GB of GPU Memory, 128GB of RAM, 4 CPU cores, and is CUDA-enabled. +If you are sparsifying a very large model, you may need more RAM than the recommended 128GB. If you encounter issues setting up your training environment, [file a GitHub issue](https://github.com/neuralmagic/sparsify/issues). +
-## Step 2: Account Creation +#### 1.2 Create an Account -Creating a new one-time account is simple and free. An account is required to manage your Experiments and API keys. -Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. +Creating a new one-time account is simple and free. +An account is required to manage your Experiments and API keys. +Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. +If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. -See the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) for more details. +Sparsify Sign In -## Step 3: Installation +#### 1.3 Install Sparsify -`pip` is the preferred method for installing Sparsify. It is advised to create a fresh [virtual environment](https://docs.python.org/3/library/venv.html) to avoid dependency issues. +`pip` is the preferred method for installing Sparsify. +It is advised to create a [fresh virtual environment](https://docs.python.org/3/library/venv.html) to avoid dependency issues. Install with pip using: - ```bash pip install sparsify-nightly ``` -## Step 4: Sparsify Login + +#### 1.4 Login via CLI Next, with Sparsify installed on your training hardware: -1. Authorize the local CLI to access your account by running the `sparsify.login` command and providing your API key. -2. Locate your API key on the homepage of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal, and copy the command or the API key itself. +1. Authorize the local CLI to access your account by running the sparsify.login command and providing your API key. +2. Locate your API key on the homepage of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the 'Get set up' modal, and copy the command or the API key itself. 3. Run the following command: ```bash sparsify.login API_KEY -```` - -See the related guides for more details on: -* Locating the API_KEY - [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). -* Running the `sparsify.login` command - [CLI/API Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cli-api-guide.md). - -**Note:** Every time you use Sparsify, you will need to log in via the Sparsify CLI so that your local session can sync with your account in the Sparsify Cloud. - -## Step 5: Running Experiments - -In this section, you will learn about Sparsify Experiments and run an Experiment. +``` -### Experiments Overview -Experiments are the core of sparsifying a model. They allow you to apply sparsification algorithms to a dataset and model through the three Experiment types detailed below: One-Shot, Training-Aware, or Sparse-Transfer. All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison, using Sparsify's two components: +### 2. Run an Experiment -* Sparsify Cloud - explore hyperparameters, predict performance, and generate the desired CLI/API command. - * See the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) for more details on generating commands from the Sparsify Cloud. - -* Sparsify CLI/API - run an experiment depending on your use case. +Experiments are the core of sparsifying a model. +They allow you to apply sparsification algorithms to a dataset and model through the three Experiment types detailed below: +- [One-Shot](#21-one-shot) +- [Training-Aware](#22-training-aware) +- [Sparse-Transfer](#23-sparse-transfer) -Learn more about the Experiment types and understand which use case might be best for your task. +All Experiments are run locally on your training hardware and can be synced with the cloud for further analysis and comparison, using Sparsify's two components: +- Sparsify Cloud - explore hyperparameters, predict performance, and generate the desired CLI/API command. +- Sparsify CLI/API - run an experiment. -#### One-Shot Experiments +#### 2.1 One-Shot | Sparsity | Sparsification Speed | Accuracy | |----------|----------------------|----------| | **++** | **+++++** | **+++** | -One-Shot Experiments are the quickest way to create a faster and smaller version of your model. -The algorithms are applied to the model post-training utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. +One-Shot Experiments quickly sparsify your model post-training, providing a 3-5x speedup with minimal accuracy loss, ideal for quick model optimization without retraining your model. -Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. -They are ideal for when you want to quickly sparsify your model and don't have a lot of time to spend on the sparsification process. - -#### Sparse-Transfer Experiments - -| Sparsity | Sparsification Speed | Accuracy | -|----------|----------------------|-----------| -| **++++** | **++++** | **+++++** | - -Sparse-Transfer Experiments are the second quickest way to create a faster and smaller model for your dataset. -Sparse, foundational models are sparsified in a Training-Aware manner on a large dataset such as ImageNet. -Then, the sparse patterns are transferred to your dataset through a fine-tuning process. - -Generally, Sparse-Transfer Experiments result in a 5-10x speedup with minimal accuracy loss. -They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. -Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. - -#### Training-Aware Experiments - -| Sparsity | Sparsification Speed | Accuracy | -|-----------|-----------------------|-----------| -| **+++++** | **++** | **+++++** | - -Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. -The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. -However, they do require additional training time and hyperparameter tuning to achieve the best results. - -Generally, Training-Aware Experiments result in a 6-12x speedup with minimal accuracy loss. -They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. -Note, the model argument is optional for Sparse-Transfer Experiments, as Sparsify will select the best one from the SparseZoo for your use case if not supplied. +To run a One-Shot Experiment for your model, dataset, and use case, use the following command: +```bash +sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATASET --optim-level OPTIM_LEVEL +``` -### Command Syntax and Arguments -Now that you have learned about Experiments and the various types, you are ready to run an Experiment. -Running Experiments uses the following general command: +For example, to sparsify a ResNet50 model on the ImageNet dataset for image classification, run the following commands: +```bash +wget https://public.neuralmagic.com/datasets/cv/classification/imagenet_calibration.tar.gz +tar -xzf imagenet_calibration.tar.gz +sparsify.run one-shot --use-case image_classification --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none" --data ./imagenet_calibration --optim-level 0.5 +``` +Or, to sparsify a BERT model on the SST-2 dataset for sentiment analysis, run the following commands: ```bash -sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL +wget https://public.neuralmagic.com/datasets/nlp/text_classification/sst2_calibration.tar.gz +tar -xzf sst2_calibration.tar.gz +sparsify.run one-shot --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data ./sst2_calibration --optim-level 0.5 ``` -The values for each of the arguments follow these general rules: -- **`EXPERIMENT_TYPE`**: one of `one-shot`, `training-aware`, or `sparse-transfer`. -- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md) **`USE_CASE`**: the use case you're solving for, such as `image-classification`, `object-detection`, `text-classification`, or a custom use case. -- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) **`MODEL`**: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently, the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. -- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) **`DATA`**: the dataset you want to use to sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ-formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and Hugging Face datasets for NLP/NLG. -- [[Guide]](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md) **`OPTIM_LEVEL`**: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. +To dive deeper into One-Shot Experiments, read through the [One-Shot Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot_experiment-guide.md). -### Example Commands by Experiment Type + +Note, One-Shot Experiments currently require the model to be in an ONNX format and the dataset to be in a Numpy format. +More details are provided in the One-Shot Experiment Guide. + -Here are examples you may wish to run; pick your use case and see if you can successfully run your first experiment! -With successful experiments, a `model.onnx` file will be created in your working directory, which will be the optimized model, and you will have no CLI errors. +#### 2.2 Sparse-Transfer -#### Running One-Shot Experiments +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|-----------| +| **++++** | **++++** | **+++++** | -Computer Vision: -```bash -sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 -``` +Sparse-Transfer Experiments quickly create a smaller and faster model for your dataset by transferring from a [SparseZoo](https://sparsezoo.neuralmagic.com/) pre-sparsified foundational model o, providing a 5-10x speedup with minimal accuracy loss, ideal for quick model optimization without retraining your model. -NLP: +To run a Sparse-Transfer Experiment for your model (optional), dataset, and use case, run the following command: ```bash -sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +sparsify.run sparse-transfer --use-case USE_CASE --model OPTIONAL_MODEL --data DATASET --optim-level OPTIM_LEVEL ``` -#### Running Sparse-Transfer Experiments - -Computer Vision: +For example, to sparse transfer a SparseZoo model to the ImageNette dataset for image classification, run the following command: ```bash sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 ``` -NLP: +Or, to sparse transfer a SparseZoo model to the SST-2 dataset for sentiment analysis, run the following command: ```bash sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 ``` -#### Running Training-Aware Experiments +To dive deeper into Sparse-Transfer Experiments, read through the [Sparse-Transfer Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer_experiment-guide.md). + + +Note, Sparse-Transfer Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or HuggingFace Transformers. +Datasets must additionally match the expected format of the underlying integration. +More details and exact formats are provided in the Sparse-Transfer Experiment Guide. + -Computer Vision: +#### 2.3 Training-Aware + +| Sparsity | Sparsification Speed | Accuracy | +|-----------|-----------------------|-----------| +| **+++++** | **++** | **+++++** | + +Training-aware Experiments sparsify your model during training, providing a 6-12x speedup with minimal accuracy loss, ideal for thorough model optimization when the best performance and accuracy are required. + +To run a Training-Aware Experiment for your model, dataset, and use case, run the following command: ```bash -sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +sparsify.run training-aware --use-case USE_CASE --model OPTIONAL_MODEL --data DATASET --optim-level OPTIM_LEVEL ``` -NLP: +For example, to sparsify a ResNet50 model on the ImageNette dataset for image classification, run the following command: ```bash -sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +sparsify.run training-aware --use-case image_classification --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" --data imagenette --optim-level 0.5 ``` -## Step 6: Comparing Experiment Results - -Once you have run your Experiment, compare the results printed out to the console using the `deepsparse.benchmark` command. -In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. - +Or, to sparsify a BERT model on the SST-2 dataset for sentiment analysis, run the following command: +```bash +sparsify.run training-aware --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data sst2 --optim-level 0.5 +``` -To compare the results of your Experiment with the original dense baseline model, use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. +To dive deeper into Training-Aware Experiments, read through the [Training-Aware Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware_experiment-guide.md). + + +Note, Training-Aware Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or HuggingFace Transformers. +Datasets must additionally match the expected format of the underlying integration. +More details and exact formats are provided in the Training-Aware Experiment Guide. + + +### 3. Compare Results + +Once you have run your Experiment, the results, logs, and deployment files will be saved under the current working directory in the following format: +```text +[EXPERIMENT_TYPE]_[USE_CASE]_{DATE_TIME} +├── deployment +│ ├── model.onnx +│ └── *supporting files* +├── logs +│ ├── *logs* +├── training_artifacts +│ ├── *training artifacts* + ├── *metrics and results* +``` +You can compare the accuracy by looking through the metrics printed out to the console and the metrics saved in the experiment directory. +Additionally, you can use [DeepSparse](https://github.com/neuralmagic/deepsparse) to compare the inference performance on your CPU deployment hardware. -See the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md) for more information on benchmarking. + +Note: In the near future, you will be able to visualize the results in the Cloud, simulate other scenarios and hyperparameters, compare the results to other Experiments, and package for your deployment scenario. + -Here is an example of a `deepsparse.benchmark`command: +To run a benchmark on your deployment hardware, use the `deepsparse.benchmark` command with your original model and the new optimized model. +This will run a number of inferences to simulate a real-world scenario and print out the results. +It's as simple as running the following command: +```bash +deepsparse.benchmark --model MODEL --scenario SCENARIO ``` -deepsparse.benchmark zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none --scenario sync +For example, to benchmark a dense ResNet-50 model, run the following command: +```bash +deepsparse.benchmark --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" --scenario sync ``` -The results will look something like this: +This can then be compared to the sparsified ResNet-50 model with the following command: ```bash -2023-06-30 15:20:41 deepsparse.benchmark.benchmark_model INFO Thread pinning to cores enabled -downloading...: 100%|████████████████████████| 105M/105M [00:18<00:00, 5.81MB/s] +deepsparse.benchmark --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none" --scenario sync +``` + +The output will look similar to the following: +```text DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) -[7ffba5a84700 >WARN< operator() ./src/include/wand/utility/warnings.hpp:14] Generating emulated code for quantized (INT8) operations since no VNNI instructions were detected. Set NM_FAST_VNNI_EMULATION=1 to increase performance at the expense of accuracy. -2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: - onnx_file_path: /home/rahul/.cache/sparsezoo/neuralmagic/obert-base-sst2_wikipedia_bookcorpus-pruned90_quantized/model.onnx +deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: + onnx_file_path: ./model.onnx batch_size: 1 - num_cores: 10 + num_cores: 1 num_streams: 1 scheduler: Scheduler.default fraction_of_supported_ops: 0.9981 cpu_avx_type: avx512 cpu_vnni: False -2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'input_ids', type = int64, shape = [1, 128] -2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'attention_mask', type = int64, shape = [1, 128] -2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'token_type_ids', type = int64, shape = [1, 128] -2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO Starting 'singlestream' performance measurements for 10 seconds -Original Model Path: zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none +=Original Model Path: ./model.onnx Batch Size: 1 Scenario: sync Throughput (items/sec): 134.5611 @@ -328,34 +311,63 @@ Latency Std (ms/batch): 0.0264 Iterations: 1346 ``` -*Note: performance improvement is not guaranteed across all runtimes and hardware types.* +See the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md) for more information on benchmarking. -## Step 7: Deploying Your Model With DeepSparse +### 4. Deploy a Model -As an optional step to this quick start, now that you have your optimized model, you are ready for inferencing. To get the most inference performance out of your optimized model, we recommend you deploy on Neural Magic's [DeepSparse](https://docs.neuralmagic.com/deepsparse). DeepSparse is built to get the best performance out of optimized models on CPUs. +As an optional step to this quickstart, now that you have your optimized model, you are ready for inferencing. +To get the most inference performance out of your optimized model, we recommend you deploy on Neural Magic's [DeepSparse](https://docs.neuralmagic.com/deepsparse). +DeepSparse is built to get the best performance out of optimized models on CPUs. DeepSparse Server takes in a task and a model path and will enable you to serve models and `Pipelines` for deployment in HTTP. -You can deploy any ONNX model using DeepSparse Server by running: +You can deploy any ONNX model using DeepSparse Server with the following command: ```bash -deepsparse.server \ -task question_answering \ ---model_path "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/12layer_pruned80_quant-none-vnni"` +deepsparse.server --task USE_CASE --model_path MODEL_PATH ``` -To run inference on your own model, change the model path to the location of your `model.onnx` file. Consult the [DeepSparse Server Docs](https://docs.neuralmagic.com/user-guides/deploying-deepsparse/deepsparse-server) for more details. +Where `USE_CASE` is the use case of your Experiment and `MODEL_PATH` is the path to the deployment folder from the Experiment. + +For example, to deploy a sparsified ResNet-50 model, run the following command: +```bash +deepsparse.server --task image_classification --model_path "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none" +``` + +If you're not ready for deploying, congratulations on completing the quickstart! + +## Companion Guides + +- [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) +- [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md) +- [Sparsify Models Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) +- [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) + +## Resources + +Now that you have explored Sparsify [Alpha], here are other related resources. + +### Feedback and Support + +Report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). Alpha support is provided through those channels. + +### Terms and Conditions -If you're not ready for deploying, congratulations on completing the quick start! We welcome your [Sparsify Alpha feedback and support issues](#feedback-and-support) as described at the beginning of this guide. +Sparsify Alpha is a pre-release version of Sparsify that is still in active development. +The product is not yet ready for production use; APIs and UIs are subject to change. +There may be bugs in the Alpha version, which we hope to have fixed before Beta and then a general Q3 2023 release. +The feedback you provide on quality and usability helps us identify issues, fix them, and make Sparsify even better. +This information is used internally by Neural Magic solely for that purpose. +It is not shared or used in any other way. -# Resources -Now that you have explored the Sparsify Alpha Quick Start, here are other related resources. +That being said, we are excited to share this release and hear what you think. +Thank you in advance for your feedback and interest! -## Learning More +### Learning More - Documentation: [SparseML,](https://docs.neuralmagic.com/sparseml/) [SparseZoo,](https://docs.neuralmagic.com/sparsezoo/) [Sparsify (1st Generation),](https://docs.neuralmagic.com/sparsify/) [DeepSparse](https://docs.neuralmagic.com/deepsparse/) - Neural Magic: [Blog,](https://www.neuralmagic.com/blog/) [Resources](https://www.neuralmagic.com/resources/) -## Release History +### Release History Official builds are hosted on PyPI @@ -364,7 +376,7 @@ Official builds are hosted on PyPI Additionally, more information can be found via [GitHub Releases.](https://github.com/neuralmagic/sparsify/releases) -## License +### License The project is licensed under the [Apache License Version 2.0](https://github.com/neuralmagic/sparsify/blob/main/LICENSE). @@ -384,7 +396,8 @@ For more general questions about Neural Magic, please fill out this [form.](http ### Cite -Find this project useful in your research or other communications? Please consider citing: +Find this project useful in your research or other communications? +Please consider citing: ```bibtex @InProceedings{ diff --git a/docs/cli-api-guide.md b/docs/cli-api-guide.md deleted file mode 100644 index 41d12621..00000000 --- a/docs/cli-api-guide.md +++ /dev/null @@ -1,198 +0,0 @@ - - - -# Sparsify CLI/API Guide - -The Sparsify CLI/API is a Python package that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate into your own workflows. - -## Installing Sparsify - -Next, install Sparsify on your training hardware by running the following command: - -```bash -pip install sparsify-nightly -``` - -For more details and system/hardware requirements, see the [Installation](https://github.com/neuralmagic/sparsify#installation) section. - -## Logging in to Sparsify - -With Sparsify installed on your training hardware, you will need to authorize the local CLI to access your account. -This is done by running the `sparsify.login` command and providing your API key. -Locate your API key on the home page of the [Sparsify Cloud](https://apps.neuralmagic.com/sparsify) under the **'Get set up'** modal. -Once you have located this, copy the command or the API key itself and run the following command: - -```bash -sparsify.login API_KEY -```` - -The `sparsify.login API_KEY` command is used to sync your local training environment with the Sparsify Cloud in order to keep track of your Experiments. Once you run the `sparsify.login API_KEY` command, you should see a confirmation via the console that you are logged in to Sparsify. To log out of Sparsify, use the `exit` command. - -If you encounter any issues with your API key, reach out to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), [email](mailto::rob@neuralmagic.com) or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). - - -## Running an Experiment - -Experiments are the core of sparsifying a model. -They are the process of applying sparsification algorithms in One-Shot, Training-Aware, or Sparse-Transfer to a dataset and model. - -All Experiments are run locally on your training hardware and can be synced with Sparsify Cloud for further analysis and comparison. - -To run an Experiment, you can use either the CLI or the API, depending on your use case. -The Sparsify Cloud provides a UI for exploring hyperparameters, predicting performance, and generating the desired CLI/API command. - -The general command for running an Experiment is: - -```bash -sparsify.run EXPERIMENT_TYPE --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL -``` - -Where the values for each of the arguments follow these general rules: -- EXPERIMENT_TYPE: one of `one-shot`, `training-aware`, or `sparse-transfer`. - -- USE_CASE: the use case you're solving for, such as `image-classification`, `object-detection`, `text-classification`, a custom use case, etc. A full list of supported use cases for each Experiment type can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). - -- MODEL: the model you want to sparsify which can be a model name such as `resnet50`, a stub from the [SparseZoo](https://sparsezoo.neuralmagic.com), or a path to a local model. For One-Shot, currently, the model must be in an ONNX format. For Training-Aware and Sparse-Transfer, the model must be in a PyTorch format. More details on model formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md). - -- DATA: the dataset you want to use to sparsify the model. This can be a dataset name such as `imagenette` or a path to a local dataset. Currently, One-Shot only supports NPZ formatted datasets. Training-Aware and Sparse-Transfer support PyTorch ImageFolder datasets for image classification, YOLOv5/v8 datasets for object detection and segmentation, and Hugging Face datasets for NLP/NLG. More details on dataset formats can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). - -- OPTIM_LEVEL: the desired sparsification level from 0 (none) to 1 (max). The general rule is that 0 is the baseline model, <0.3 only quantizes the model, 0.3-1.0 increases the sparsity of the model and applies quantization. More details on sparsification levels can be found [here](https://github.com/neuralmagic/sparsify/blob/main/docs/optim-levels-guide.md). - - -### Experiment Type Examples -#### Running One-Shot - -| Sparsity | Sparsification Speed | Accuracy | -|----------|----------------------|----------| -| **++** | **+++++** | **+++** | - -One-Shot Experiments are the quickest way to create a faster and smaller version of your model. -The algorithms are applied to the model post-training, utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. - -Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. -They are ideal for when you want to quickly sparsify your model and have limited time to spend on the sparsification process. - -CV Example: -```bash -sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 -``` - -NLP Example: -```bash -sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 -``` - -#### Running Sparse-Transfer - -| Sparsity | Sparsification Speed | Accuracy | -|----------|----------------------|-----------| -| **++++** | **++++** | **+++++** | - -Sparse-Transfer Experiments are the second quickest way to create a faster and smaller model for your dataset. -Sparse, foundational models are sparsified in a Training-Aware manner on a large dataset such as ImageNet. -Then, the sparse patterns are transferred to your dataset through a fine-tuning process. - -Generally, Sparse-Transfer Experiments result in a 5-10x speedup with minimal accuracy loss. -They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. -Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. - -CV Example: -```bash -sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 -``` - -NLP Example: -```bash -sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 -``` - -#### Running Training-Aware - -| Sparsity | Sparsification Speed | Accuracy | -|-----------|-----------------------|-----------| -| **+++++** | **++** | **+++++** | - -Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. -The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. -However, they do require additional training time and hyperparameter tuning to achieve the best results. - -Generally, Training-Aware Experiments result in a 6-12x speedup with minimal accuracy loss. -They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. -Note, the model argument is optional for Sparse-Transfer Experiments as Sparsify will select the best one from the SparseZoo for your use case if not supplied. - -CV Example: -```bash -sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 -``` - -NLP Example: -```bash -sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 -``` - -## Advanced CLI/API Usage -Landing Soon! - - -## Comparing the Experiment results - -Once you have run your Experiment, you can compare the results printed out to the console using the `deepsparse.benchmark` command. -In the near future, you will be able to compare the results in the Cloud, measure other scenarios, and compare the results to other Experiments. - - -To compare the results of your Experiment with the original dense baseline model, you can use the `deepsparse.benchmark` command with your original model and the new optimized model on your deployment hardware. Models that have been optimized using Sparsify will generally run performantly on DeepSparse, Neural Magic's sparsity-aware CPU inference runtime. - - -For more information on benchmarking, see the [DeepSparse Benchmarking User Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). - -Here is an example of a `deepsparse.benchmark`command: - -``` -deepsparse.benchmark zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none --scenario sync - -``` - -The results will look something like this: -```bash -2023-06-30 15:20:41 deepsparse.benchmark.benchmark_model INFO Thread pinning to cores enabled -downloading...: 100%|████████████████████████| 105M/105M [00:18<00:00, 5.81MB/s] -DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.0.20230629 COMMUNITY | (fc8b788a) (release) (optimized) (system=avx512, binary=avx512) -[7ffba5a84700 >WARN< operator() ./src/include/wand/utility/warnings.hpp:14] Generating emulated code for quantized (INT8) operations since no VNNI instructions were detected. Set NM_FAST_VNNI_EMULATION=1 to increase performance at the expense of accuracy. -2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO deepsparse.engine.Engine: - onnx_file_path: /home/rahul/.cache/sparsezoo/neuralmagic/obert-base-sst2_wikipedia_bookcorpus-pruned90_quantized/model.onnx - batch_size: 1 - num_cores: 10 - num_streams: 1 - scheduler: Scheduler.default - fraction_of_supported_ops: 0.9981 - cpu_avx_type: avx512 - cpu_vnni: False -2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'input_ids', type = int64, shape = [1, 128] -2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'attention_mask', type = int64, shape = [1, 128] -2023-06-30 15:21:13 deepsparse.utils.onnx INFO Generating input 'token_type_ids', type = int64, shape = [1, 128] -2023-06-30 15:21:13 deepsparse.benchmark.benchmark_model INFO Starting 'singlestream' performance measurements for 10 seconds -Original Model Path: zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none -Batch Size: 1 -Scenario: sync -Throughput (items/sec): 134.5611 -Latency Mean (ms/batch): 7.4217 -Latency Median (ms/batch): 7.4245 -Latency Std (ms/batch): 0.0264 -Iterations: 1346 -``` - -*Note: performance improvement is not guaranteed across all runtimes and hardware types.* diff --git a/docs/cloud-user-guide.md b/docs/cloud-user-guide.md index 3ba275c8..72e44f70 100644 --- a/docs/cloud-user-guide.md +++ b/docs/cloud-user-guide.md @@ -82,7 +82,6 @@ To run an Experiment, use the Sparsify Cloud to generate a code command to run i 5. Adjust the Hyperparameter Compression slider to designate whether you would like to optimize the model for performance, accuracy, or a balance of both. Note that selecting an extreme on the slider will not completely tank the opposing metric. 6. Click 'Generate Code Snippet' to view the code snippet generated from your sparsification selections on the next modal. ![Generate Code Snippetl](https://drive.google.com/uc?id=14B193hHeYqLeSX8r6C5N1G8beBeXUkYE) - 7. Once your code snippet is generated, make sure you have installed Sparsify and are logged in via the CLI. 8. Copy the code snippet and fill in the paths to your local dense model and/or training dataset as prompted. 9. Run the command and wait for your sparse model to complete. You have now completed running an Experiment with Sparsify. diff --git a/docs/one-shot_experiment-guide.md b/docs/one-shot_experiment-guide.md new file mode 100644 index 00000000..9a60a022 --- /dev/null +++ b/docs/one-shot_experiment-guide.md @@ -0,0 +1,251 @@ + + + + +# Sparsify One-Shot Experiment Guide + +If you're just getting started with Sparsify, we recommend you try out this One-Shot Experiment pathway first. We also have Sparse-Transfer and Training-Aware Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. + +## Overview +1. One-Shot Experiment Overview +2. One-Shot CLI Quickstart +3. One-Shot Cloud Quickstart +4. Next Steps +5. Resources + + +### One-Shot Experiment Overview + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|----------| +| **++** | **+++++** | **+++** | + +One-Shot Experiments are the quickest way to create a faster and smaller version of your model. +The algorithms are applied to the model post-training, utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. + +Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. +They are ideal for when you want to quickly sparsify your model and have limited time to spend on the sparsification process. + + +### One-Shot CLI Quickstart + +Now that you understand what a One-Shot Experiment is and the benefits, including short optimization time due to post-training algorithms, you can now use the CLI to effectively run a One-Shot Experiment. + +Before you run a One-Shot Experiment, confirm you are logged into the Sparsify CLI. For installation and setup instructions, review the [Sparsify Install and Setup Section](README section.com) in the Sparsify README. + +One-Shot Experiments use the following general command: + +```bash +sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL +``` + +The values for each of the arguments follow these general rules: +- [**`USE_CASE`** ](#use_case) +- [**`MODEL`**](#model) +- [**`DATA`**](#data) +- [**`OPTIM_LEVEL`**](#optim_level) + +#### USE_CASE + +The generally supported use cases for Sparsify are: + +- CV - classification: `cv-classification` +- CV - detection: `cv-detection` +- CV - segmentation: `cv-segmentation` +- NLP - question answering: `nlp-question_answering` +- NLP - text classification: `nlp-text_classification` +- NLP - sentiment analysis: `nlp-sentiment_analysis` +- NLP - token classification: `nlp-token_classification` +- NLP - named entity recognition: `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. Sparsify will automatically recognize these aliases and apply the correct use case. + +For One-Shot Experiments, both the CLIs and APIs always support custom use cases. To utilize, run a One-Shot Experiment with `--use-case` set to the desired custom use case. This custom use case can be any string as long as it does not contain ASCII characters. + +For full details on Sparsify use cases, read the [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). + +#### MODEL + +One-Shot requires the model provided to be in an [ONNX format](https://onnx.ai/). For guidance on how to convert a PyTorch model to ONNX, read our [ONNX Export User Guide](https://docs.neuralmagic.com/user-guides/onnx-export). + +In the near future, more formats including PyTorch will be added for support with One-Shot Experiments. + +#### DATA + +For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. This format is efficient and versatile. + +##### Dataset Specifications + +- Each `.npz` file should contain a single data sample, with no batch dimension. This data sample will be run through the ONNX model. +- The `.npz` file should be structured as a dictionary, mapping the input name in the ONNX specification to a numpy array containing the data. +- All data samples should be stored under the same directory, typically named `data`. + +The local file structure should look like the following: + +```text +data + -- input1.npz + -- input2.npz + -- input3.npz +``` + +##### Example + +For example, if you have a BERT-style model with a sequence length of 128, each `.npz` file should contain a dictionary mapping input names ("input_ids", "attention_mask", "token_type_ids") to numpy arrays of the appropriate size: + +```text +{ + "input_ids": ndarray(128,), + "attention_mask": ndarray(128,), + "token_type_ids": ndarray(128,) +} +``` + +The dictionary keys should match the names of the inputs in the ONNX model specification, and the shapes of the arrays should match the expected input shapes of the model. + +##### Generating NPZ Files + +Below is an example script for generating this file structure from a PyTorch module **before the ONNX export**: + +```python +import numpy as np +import torch +from torch import Tensor + +class NumpyExportWrapper(torch.nn.Module): + def __init__(self, model): + super(NumpyExportWrapper, self).__init__() + self.model = model + self.model.eval() # Set model to evaluation mode + self.numpy_data = [] + + def forward(self, *args, **kwargs): + with torch.no_grad(): + inputs = {} + batch_size = 0 + + for index, arg in enumerate(args): + if isinstance(arg, Tensor): + inputs[f"input_{index}"] = arg + batch_size = arg.size[0] + + for key, val in kwargs.items(): + if isinstance(val, Tensor): + inputs[key] = val + batch_size = val.shape[0] + + start_index = len(self.numpy_data) + for _ in range(batch_size): + self.numpy_data.append({}) + + for input_key in iter(inputs): + for idx, input in enumerate(inputs[input_key]): + self.numpy_data[start_index+idx][input_key] = input + + return self.model(*args, **kwargs) + + def save(self, path: str = "data"): + for index, item in enumerate(self.numpy_data): + npz_file_path = f'{path}/input{str(index).zfill(4)}.npz' + np.savez(npz_file_path, **item) + + print(f'Saved {len(self.numpy_data)} npz files to {path}') + +model = NumpyExportWrapper(YOUR_MODEL) +for data in YOUR_DATA_LOADER: + model(data[0]) +model.save() +``` + +Note: Replace `YOUR_MODEL` and `YOUR_DATA_LOADER` with your PyTorch model and data loader, respectively. + +For full details on Sparsify datasets, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md#sparsify-datasets-guide). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. Specifically, it controls how much sparsification is applied to your model, with higher values resulting in faster and more compressed models. At the max range, though, you may see a drop in accuracy. + +The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. +0.5 is the default optim level and is a good starting point for most use cases. + +##### One-Shot Optim Levels + +Given that One-Shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with Sparse-Transfer or Training-Aware. +The specific ranges are the following: + +- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. +- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. +- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 40% for 0.3 to 80% for 1.0 with linear scaling between. + Additionally, INT8 quantization of the model is applied. + +The default of 0.5 will result in a ~50% sparse model with INT8 quantization. + + +### Example One-Shot Experiment CLI Commands + +Here are code examples of One-Shot Experiments you may wish to run; pick your use case and start sparsifying with One-Shot! + +#### Running One-Shot Experiments + +##### Computer Vision Use Case: + +You have an image classification use case and want to run a One-Shot Experiment on a dense ResNet-50 model using the imagenette dataset. You want to quickly and cheaply generate a sparse model so that you can build a prototype of the ResNet-50 model inferencing on a CPU server in the cloud with DeepSparse. Getting a working model that meets your deployment requirements on the imagenette dataset will give you the confidence to continue on your initiative knowing you can hit the metrics required for the business. + +You are targeting a balanced model in terms of wanting to get a 3-5x performance boost in latency while also maintaining the high accuracy of the model so that you can confidently deploy the model in production to solve your business case. + +You can use a Sparsify One-Shot Experiment to try and reach your goal. You have a standard ResNet-50 model as your dense baseline on imagenette which Sparsify already has as an alias model and npz formatted dataset hosted for you to use out of the box. Since you want to very quickly achieve a 3-5x speedup in latency performance with minimal training costs, a One-Shot Experiment makes the most sense for you for its fast optimization and lower, moderately performant sparsity profile. + +With all of these considerations in mind, run the following One-Shot Experiment command to achieve this use case goal: +```bash +sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +``` +The output is as follows: + +MARK + +##### NLP Use Case: +You are working on a text classification use case to help classify text reviews received from your customers through your e-commerce website. You have been having slow inference times using the BERT-base model and want to improve the performance to save costs. + +You want to quickly and cheaply generate a sparse BERT-base model so that you can use it to classify our customer reviews at a lower cost due to the improved performance and speed of the model. You are focused on improving the throughput of the model to process more requests, faster. + +You are targeting a balanced model in terms of wanting to get a 3-5x performance boost in throughput while having a high accuracy so your classifications are actionable. + +You can use a Sparsify One-Shot Experiment to try and reach your goal. You have a standard BERT-base model as our dense baseline on the SST2 dataset which Sparsify already has as an alias model and npz formatted dataset hosted for you to use out of the box. You want to try and reduce your costs by improving the throughput performance of your model and you are limited by our compute spend and team size. A One-Shot Experiment makes the most sense for you for its fast optimization and lower cost pathway as opposed to fully retraining the model to optimize it. + +With all of these considerations in mind, run the following One-Shot Experiment command to achieve your goal this use case goal: + +```bash +sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +``` +The output is as follows: +MARK + + +### One-Shot Cloud Quickstart + +In addition to manually creating commands, you can use the Sparsify Cloud to generate Sparsify One-Shot Experiment commands. + +To get started, read the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). + + +### Next Steps + +Now that you have successfully run a One-Shot Experiment, check out the [Sparse-Transfer](LINK.com) and [Training-Aware](LINK.com) Experiments to target different sparsity profiles. + + +### Resources +To learn more about Sparsify and all of the available pathways outside of One-Shot Experiments, refer to the [Sparsify README](https://github.com/neuralmagic/sparsify). diff --git a/docs/optim-levels-guide.md b/docs/optim-levels-guide.md deleted file mode 100644 index 5f1fac5e..00000000 --- a/docs/optim-levels-guide.md +++ /dev/null @@ -1,68 +0,0 @@ - - -# Sparsify Optim (Sparsification) Levels Guide - -When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. -Specifically, it controls how much sparsification is applied to your model, with higher values resulting in faster and more compressed models. -At the max range, though, you may see a drop in accuracy. -The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. -0.5 is the default optim level and is a good starting point for most use cases. - -## Optim Level Values - -The general rule is that 0.0 is the baseline model, <0.3 only quantizes the model, and 0.3-1.0 increases the sparsity (unstructured/structured pruning) of the model and applies quantization. -The exact mappings of optim levels depend on the experiment type. -The current mappings for each experiment type are listed below. -Note, these mappings are subject to change in future releases as we continue to improve Sparsify with new algorithms and capabilities. - -### One-Shot Optim Levels - -Given that One-Shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with Sparse-Transfer or Training-Aware. -The specific ranges are the following: - -- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. -- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. -- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 40% for 0.3 to 80% for 1.0 with linear scaling between. - Additionally, INT8 quantization of the model is applied. - -The default of 0.5 will result in a ~50% sparse model with INT8 quantization. - -### Sparse-Transfer Optim Levels - -Sparse-Transfer mappings are a bit different from One-Shot and Training-Aware since it maps to models available in the SparseZoo to transfer from. -Increasing the optim level will result in smaller and more compressed models. -The specific mappings are the following: - -- optim-level == 0.0: the largest model selected from the SparseZoo with no optimizations. -- optim-level < 0.25: the largest model selected from the SparseZoo with INT8 quantization applied to the model (activations and weights). -- optim-level < 0.5: the largest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. -- optim-level < 0.75: the medium model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. -- optim-level <= 1.0: the smallest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. - -The default of 0.5 will result in a medium-sized sparse model with INT8 quantization. - -### Training-Aware Optim Levels - -Given that Training-Aware is applied while training, the sparsity ranges are increased as compared to One-Shot since accuracy recovery is easier at higher sparsities. -The specific ranges are the following: - -- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. -- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. -- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 60% for 0.3 to 95% for 1.0 with linear scaling between. - Additionally, INT8 quantization of the model is applied. - -The default of 0.5 will result in a ~70% sparse model with INT8 quantization. diff --git a/docs/sparse-transfer-experiment-guide.md b/docs/sparse-transfer-experiment-guide.md new file mode 100644 index 00000000..830fb61b --- /dev/null +++ b/docs/sparse-transfer-experiment-guide.md @@ -0,0 +1,301 @@ + + + + + +# Sparsify Sparse-Transfer Experiment Guide + +## Overview +1. Sparse-Transfer Experiment Overview +2. Sparse-Transfer CLI Quickstart +3. Sparse-Transfer Cloud Quickstart +4. Next Steps +5. Resources + + +#### Sparse-Transfer Experiments + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|-----------| +| **++++** | **++++** | **+++++** | + +Sparse-Transfer Experiments are the second quickest way to create a faster and smaller model for your dataset. +Sparse, foundational models are sparsified in a Training-Aware manner on a large dataset such as ImageNet. +Then, the sparse patterns are transferred to your dataset through a fine-tuning process. + +Generally, Sparse-Transfer Experiments result in a 5–10x speedup with minimal accuracy loss. + +They are ideal when a sparse model already exists for your use case, and you want to quickly utilize it for your dataset. + +Note, the model argument is optional for Sparse-Transfer Experiments, as Sparsify will select the best one from the SparseZoo for your use case if not supplied. + + +### Sparse-Transfer CLI Quickstart + +Now that you understand what a Sparse-Transfer Experiment is and the benefits, including fine-tuning a pre-optimized, sparse model on your data, you can now use the CLI to effectively run a Sparse-Transfer Experiment. + +Before you run a Sparse-Transfer Experiment, confirm you are logged into the Sparsify CLI. For instructions on Installation and Setup, review the [Sparsify Install and Setup Section](READMEsection.com) in the Sparsify README. + +Sparse-Transfer Experiments use the following general command: + +```bash +sparsify.run sparse-transfer --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL +``` + +The values for each of the arguments follow these general rules: +- [**`USE_CASE`** ](#use_case) +- [**`MODEL`**](#model) (Optional) +- [**`DATA`**](#data) +- [**`OPTIM_LEVEL`**](#optim_level) + +#### USE_CASE + +The generally supported use cases for Sparsify are: + +- CV - classification: `cv-classification` +- CV - detection: `cv-detection` +- CV - segmentation: `cv-segmentation` +- NLP - question answering: `nlp-question_answering` +- NLP - text classification: `nlp-text_classification` +- NLP - sentiment analysis: `nlp-sentiment_analysis` +- NLP - token classification: `nlp-token_classification` +- NLP - named entity recognition: `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. Sparsify will automatically recognize these aliases and apply the correct use case. + +For full details on Sparsify use cases, read the [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). + +#### MODEL + + +Models are optional for the Sparse-Transfer pathway. If no model is provided, the best model for the given optimization level will be used. + +If you choose to override the model argument, the PyTorch model format is the supported model format for Sparse-Transfer Experiments. The exact format will depend on the pipeline, and therefore the use case, for the Sparse-Transfer Experiment. + +#### DATA + +For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. +Confirm that your data is formatted properly according to the standards listed below. + +##### Predefined Use Cases + +Sparse-Transfer Experiments utilize specific dataset standards depending on the use case. +Each one is listed below with an example. + +##### Image Classification + +For image classification tasks, Sparsify relies on the dataset format standard used by the PyTorch ImageFolder class. +This format is fairly simple and intuitive, and it is also widely used in the machine-learning community. + +##### Specifications + +- The root folder should contain subdirectories, each representing a single class of images. +- Images of a particular class/category should be placed inside the corresponding subdirectory. +- The subdirectory name is used as the class label and should be unique for each class. +- The images should be in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. +- Images do not need to be of the same size. + +The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. +Therefore, it is crucial to ensure the directories are properly named to avoid any confusion or mislabeling. + +##### Image Classification Example + +For an image classification task involving dogs and cats, the dataset directory should be structured as follows: + +``` +root/dog/xxx.png +root/dog/xxy.png +root/dog/xxz.png + +root/cat/123.png +root/cat/nsa.png +root/cat/asd.png +``` + +In this example, all images within the 'dog' subdirectory will be labeled as 'dog', and all images within the 'cat' subdirectory will be labeled as 'cat'. +The exact filenames ('xxx.png', 'xxy.png', etc.) do not matter; what matters is the directory structure and the directory names. + +By organizing the data in this way, it can be easily read and labeled by the PyTorch ImageFolder class, and thus easily used for training image classification models in Sparsify. + +Note that the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. +Here, 'cat' will be considered class 0, and 'dog' will be class 1, due to alphabetical order. + +##### Object Detection / Image Segmentation + +For object detection and image segmentation tasks, Sparsify supports the dataset format used by YOLOv5. +This format is specifically designed for tasks involving bounding boxes and segmentation masks and is widely adopted in the community. + +##### Specifications + +- Images should be stored in a common directory, generally named `images`. +- Annotations for the images should be stored in a separate directory, often named `labels`. +- Images can be in formats readable by OpenCV (e.g. .jpg, .png). +- Each image should have a corresponding annotation file. The annotation files should be in plain text format (.txt). +- The name of the annotation file should be the same as the corresponding image file, except with a .txt extension. +- Annotation files for object detection should contain one line for each object in the image. Each line should be in the format: ` `, where the values are normalized relative to the size of the image. +- Annotation files for image segmentation should contain information about the segmentation masks. + +##### Object Detection / Image Segmentation Example + +For an object detection task involving detecting cars and pedestrians, the dataset directory should be structured as follows: + +``` +dataset/ +├── images/ +│ ├── image1.jpg +│ └── image2.jpg +└── labels/ + ├── image1.txt + └── image2.txt +``` + +For `image1.jpg`, if there's a car and a pedestrian in the image, the corresponding `image1.txt` file could look like this: + +``` +0 0.5 0.6 0.2 0.3 +1 0.7 0.8 0.1 0.2 +``` + +This would mean that there is an object of class 0 (car) centered at (50% of the image width, 60% of the image height) and having a width of 20% of the image width and a height of 30% of the image height. +The second line is similar but for an object of class 1 (pedestrian). + +For image segmentation, the labels might be more complex, including segmentation masks that indicate which pixels belong to which object category. + +Make sure the class labels are consistent with what is expected by the YOLOv5 configuration you are using, and that the bounding box coordinates are normalized as described above. + +##### Natural Language (NLP/NLG) + +For natural language processing (NLP) and natural language generation (NLG) tasks, Sparsify supports the dataset formats used by the Hugging Face library. +Hugging Face datasets can be represented in various file formats, including JSON, CSV, and JSON lines format (.jsonl). + +##### Specifications + +- Each row or line in your data file should represent a single example. +- The data must include the features necessary for your task. For example, a dataset for text classification might include 'text' and 'label' fields. +- For JSON files, each line should be a separate, self-contained JSON object. +- For CSV files, the first row should include the column names, and each subsequent row should include the fields for a single example. +- The file should be UTF-8 encoded to support a wide range of text inputs. + +##### Natural Language (NLP/NLG) Example + +Here's an example of how you might structure a dataset for a sentiment analysis task: + +If you're using a JSON lines (.jsonl) format, your file could look like this: + +``` +{"text": "I love this movie!", "label": "positive"} +{"text": "This movie was awful.", "label": "negative"} +{"text": "I have mixed feelings about this film.", "label": "neutral"} +``` + +Each line is a separate JSON object, representing a single example. + +If you're using a CSV format, your file could look like this: + +``` +text,label +"I love this movie!","positive" +"This movie was awful.","negative" +"I have mixed feelings about this film.","neutral" +``` + +The first row contains the column names, and each subsequent row represents a single example. + +Whether you choose to use JSON lines or CSV will depend on your specific needs and preferences, but either format will work well with Hugging Face and Sparsify. +Make sure your data is formatted correctly according to these specifications to ensure it can be used in your experiments. + +##### Custom Use Cases +Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. In the near future, these will be supported through plugin specifications. + +For full details on Sparsify datasets, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md#sparsify-datasets-guide). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. At the max range, though, you may see a drop in accuracy. + +The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. +0.5 is the default optim level and is a good starting point for most use cases. + +##### Sparse-Transfer Optim Levels + +Sparse-Transfer optim_level mappings are unique since they map to models available in the SparseZoo to transfer from. Increasing the optim level will result in smaller and more compressed models. The specific mappings are the following: + +- optim-level == 0.0: the largest model selected from the SparseZoo with no optimizations. +- optim-level < 0.25: the largest model selected from the SparseZoo with INT8 quantization applied to the model (activations and weights). +- optim-level < 0.5: the largest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. +- optim-level < 0.75: the medium model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. +- optim-level <= 1.0: the smallest model selected from the SparseZoo with both unstructured pruning (sparsity) and INT8 quantization applied to the model. + +The default of 0.5 will result in a medium-sized sparse model with INT8 quantization. + + +### Example Sparse-Transfer Experiment CLI Commands + +Here are code examples of Sparse-Transfer Experiments you may wish to run; pick your use case and start sparsifying with Sparse-Transfer! + +#### Running Sparse-Transfer Experiments + +##### Computer Vision Use Case: + +You have an image classification use case and want to run a Sparse-Transfer Experiment on the imagenette dataset. You don't care about the specific model architecture and just want to leverage SparseZoo's best-optimized model for classification and just apply your dataset to that model to create an accurate, highly performant model to accelerate inference. + +You are targeting a balanced model, but are targeting a pretty drastic 5-10x performance boost in latency while also maintaining the high accuracy of the model so that you can confidently deploy the model in production to solve your business case. + +You can use a Sparsify Sparse-Transfer Experiment to try and reach your goal. Sparse-Transfer Experiments use existing optimized models and apply your data to them to easily create fine-tuned optimized models. Since you want to very quickly achieve a 5-10x speedup in latency performance and are model agnostic, a Sparse-Transfer Experiment makes the most sense for you for its highly optimized, performant sparsity profile on your data. + +With all of these considerations in mind, run the following Sparse-Transfer Experiment command to achieve your use case goal: +```bash +sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 +``` + +The output is as follows: + +MARK + +##### NLP Use Case: +You are working on a text classification use case to help classify text reviews received from your customers through your e-commerce website. You have been having slow inference times using the BERT-base model and want to improve the performance to save cost. + +You are targeting a balanced model, but are targeting a pretty drastic 5-10x performance boost in text classification throughput while also maintaining the high accuracy of the model so that you can confidently deploy the model in production to solve your business case. You are focused on improving the throughput of the model to process more requests, faster. The model itself isn't as important as the performance for this text classification use case. + +You are targeting a balanced model in terms of wanting to get a 5-10x performance boost in throughput while having a high accuracy so your classifications are actionable. + +You can use a Sparsify Sparse-Transfer Experiment to try and reach your goal. Since you want to use the SST2 dataset and are model agnostic for this text classification use case, Sparsify will apply your data to a pre-optimized model behind the scenes. A Sparse-Transfer Experiment makes the most sense for us for a high sparsity profile and model-agnostic approach in transfer learning your data onto a pre-optimized model. + +With all of these considerations in mind, run the following Sparse-Transfer Experiment command to achieve your use case goal: + +```bash +sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 +``` +The output is as follows: +MARK + + +### Sparse-Transfer Cloud Quickstart + +In addition to manually creating commands, you use Sparsify Cloud to generate Sparsify Sparse-Transfer Experiment commands. + +To get started, read the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). + + +### Next Steps + +Now that you have successfully run a Sparse-Transfer Experiment, check out the [One-Shot](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot_experiment-guide.md) and [Training-Aware](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware_experiment-guide.md) Experiments to target different sparsity profiles. + + +### Resources +To learn more about Sparsify and all of the available pathways outside of Sparse-Transfer Experiments, refer to the [Sparsify README](https://github.com/neuralmagic/sparsify). diff --git a/docs/training-aware_experiment-guide.md b/docs/training-aware_experiment-guide.md new file mode 100644 index 00000000..1225547a --- /dev/null +++ b/docs/training-aware_experiment-guide.md @@ -0,0 +1,302 @@ + + + + + + +# Sparsify Training-Aware Experiment Guide + +## Overview +1. Training-Aware Experiment Overview +2. Training-Aware CLI Quickstart +3. Training-Aware Cloud Quickstart +4. Next Steps +5. Resources + + + +#### Training-Aware Experiments + +| Sparsity | Sparsification Speed | Accuracy | +|-----------|-----------------------|-----------| +| **+++++** | **++** | **+++++** | + +Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. +The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. +However, they do require additional training time and hyperparameter tuning to achieve the best results. + +Generally, Training-Aware Experiments result in a 6–12x speedup with minimal accuracy loss. They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. + + +### Training-Aware CLI Quickstart + +Now that you understand what a Training-Aware Experiment is and the benefits, including the best possible recovery of accuracy for an optimized model, you can now use the CLI to effectively run a Training-Aware Experiment. + +Before you run a Training-Aware Experiment, confirm you are logged in to the Sparsify CLI. For instructions on Installation and Setup, review the [Sparsify Install and Setup Section](READMEsection.com) in the Sparsify README. + +Training-Aware Experiments use the following general command: + +```bash +sparsify.run training-aware --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL +``` + +The values for each of the arguments follow these general rules: +- [**`USE_CASE`** ](#use_case) +- [**`MODEL`**](#model) +- [**`DATA`**](#data) +- [**`OPTIM_LEVEL`**](#optim_level) + +#### USE_CASE + +The generally supported use cases for Sparsify are: + +- CV - classification: `cv-classification` +- CV - detection: `cv-detection` +- CV - segmentation: `cv-segmentation` +- NLP - question answering: `nlp-question_answering` +- NLP - text classification: `nlp-text_classification` +- NLP - sentiment analysis: `nlp-sentiment_analysis` +- NLP - token classification: `nlp-token_classification` +- NLP - named entity recognition: `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. Sparsify will automatically recognize these aliases and apply the correct use case. + +For Training-Aware Experiments, custom use cases are only supported with the APIs for custom integrations. This is because non-custom integrations utilize plugins that correspond to the appropriate use case for training pipelines. To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. More information on this specific pathway will be available in the near future as Sparsify's development progresses. + +For full details on Sparsify use cases, read the [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). + +#### MODEL + + +The PyTorch model format is the supported model format for Training-Aware Experiments. The exact format will depend on the pipeline, and therefore the use case, for the Training-Aware Experiment. + +#### DATA + +For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few, popular formats for datasets. +Confirm that your data is formatted properly according to the standards listed below. + +##### Predefined Use Cases + +Training-Aware Experiments utilize specific dataset standards depending on the use case. +Each one is listed below with an example. + +##### Image Classification + +For image classification tasks, Sparsify relies on the dataset format standard used by the PyTorch ImageFolder class. +This format is fairly simple and intuitive, and it is also widely used in the machine-learning community. + +##### Specifications + +- The root folder should contain subdirectories, each representing a single class of images. +- Images of a particular class/category should be placed inside the corresponding subdirectory. +- The subdirectory name is used as the class label and should be unique for each class. +- The images should be in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. +- Images do not need to be of the same size. + +The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. +Therefore, it is crucial to ensure the directories are properly named to avoid any confusion or mislabeling. + +##### Image Classification Example + +For an image classification task involving dogs and cats, the dataset directory should be structured as follows: + +``` +root/dog/xxx.png +root/dog/xxy.png +root/dog/xxz.png + +root/cat/123.png +root/cat/nsa.png +root/cat/asd.png +``` + +In this example, all images within the 'dog' subdirectory will be labeled as 'dog', and all images within the 'cat' subdirectory will be labeled as 'cat'. +The exact filenames ('xxx.png', 'xxy.png', etc.) do not matter; what matters is the directory structure and the directory names. + +By organizing the data in this way, it can be easily read and labeled by the PyTorch ImageFolder class, and thus easily used for training image classification models in Sparsify. + +Note that the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. +Here, 'cat' will be considered class 0, and 'dog' will be class 1, due to alphabetical order. + +##### Object Detection / Image Segmentation + +For object detection and image segmentation tasks, Sparsify supports the dataset format used by YOLOv5. +This format is specifically designed for tasks involving bounding boxes and segmentation masks and is widely adopted in the community. + +##### Specifications + +- Images should be stored in a common directory, generally named `images`. +- Annotations for the images should be stored in a separate directory, often named `labels`. +- Images can be in formats readable by OpenCV (e.g. .jpg, .png). +- Each image should have a corresponding annotation file. The annotation files should be in plain text format (.txt). +- The name of the annotation file should be the same as the corresponding image file, except with a .txt extension. +- Annotation files for object detection should contain one line for each object in the image. Each line should be in the format: ` `, where the values are normalized relative to the size of the image. +- Annotation files for image segmentation should contain information about the segmentation masks. + +##### Object Detection / Image Segmentation Example + +For an object detection task involving detecting cars and pedestrians, the dataset directory should be structured as follows: + +``` +dataset/ +├── images/ +│ ├── image1.jpg +│ └── image2.jpg +└── labels/ + ├── image1.txt + └── image2.txt +``` + +For `image1.jpg`, if there's a car and a pedestrian in the image, the corresponding `image1.txt` file could look like this: + +``` +0 0.5 0.6 0.2 0.3 +1 0.7 0.8 0.1 0.2 +``` + +This would mean that there is an object of class 0 (car) centered at (50% of the image width, 60% of the image height) and having a width of 20% of the image width and a height of 30% of the image height. +The second line is similar but for an object of class 1 (pedestrian). + +For image segmentation, the labels might be more complex, including segmentation masks that indicate which pixels belong to which object category. + +Make sure the class labels are consistent with what is expected by the YOLOv5 configuration you are using, and that the bounding box coordinates are normalized as described above. + +##### Natural Language (NLP/NLG) + +For natural language processing (NLP) and natural language generation (NLG) tasks, Sparsify supports the dataset formats used by the Hugging Face library. +Hugging Face datasets can be represented in various file formats including JSON, CSV, and JSON lines format (.jsonl). + +##### Specifications + +- Each row or line in your data file should represent a single example. +- The data must include the features necessary for your task. For example, a dataset for text classification might include 'text' and 'label' fields. +- For JSON files, each line should be a separate, self-contained JSON object. +- For CSV files, the first row should include the column names, and each subsequent row should include the fields for a single example. +- The file should be UTF-8 encoded to support a wide range of text inputs. + +##### Natural Language (NLP/NLG) Example + +Here is an example of how you might structure a dataset for a sentiment analysis task: + +If you're using a JSON lines (.jsonl) format, your file could look like this: + +``` +{"text": "I love this movie!", "label": "positive"} +{"text": "This movie was awful.", "label": "negative"} +{"text": "I have mixed feelings about this film.", "label": "neutral"} +``` + +Each line is a separate JSON object, representing a single example. + +If you are using a CSV format, your file could look like this: + +``` +text,label +"I love this movie!","positive" +"This movie was awful.","negative" +"I have mixed feelings about this film.","neutral" +``` + +The first row contains the column names, and each subsequent row represents a single example. + +Whether you choose to use JSON lines or CSV will depend on your specific needs and preferences, but either format will work well with Hugging Face and Sparsify. +Make sure your data is formatted correctly according to these specifications to ensure it can be used in your experiments. + +##### Custom Use Cases +Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. In the near future, these will be supported through plugin specifications. + +For full details on Sparsify datasets, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md#sparsify-datasets-guide). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. At the max range, though, you may see a drop in accuracy. + +The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. +0.5 is the default optim level and is a good starting point for most use cases. + +##### Training-Aware Optim Levels + + +Given that Training-Aware is applied while training, the sparsity ranges are increased as compared to one shot since accuracy recovery is easier at higher sparsities. + +The specific ranges are the following: + +- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. +- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. +- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 60% for 0.3 to 95% for 1.0 with linear scaling between. + Additionally, INT8 quantization of the model is applied. + +The default of 0.5 will result in a ~70% sparse model with INT8 quantization. + + +### Example Training-Aware Experiment CLI Commands + +Here are code examples of Training-Aware Experiments you may wish to run; pick your use case and start sparsifying with Training-Aware! + +#### Running Training-Aware Experiments + +##### Computer Vision Use Case: + +You have an image classification use case and want to run a Training-Aware Experiment on a dense ResNet-50 model using the imagenette dataset. You want to ensure you have the most sparsity to get the best possible performance and maintain a high level of accuracy. + +You are targeting a balanced model in terms of wanting to get a 6-12x performance boost in latency while also maintaining the high accuracy of the model so that you can confidently deploy the model in production to solve your business case. + +You can use a Sparsify Training-Aware Experiment to try and reach your goal. Training-Aware Experiments apply SOTA optimization techniques during training to generate a highly optimized sparse model with very little to no impact on accuracy. Since you want to get the most possible performance speedup in latency and need a high level of accuracy, a Training-Aware Experiment makes the most sense for you for its highly optimized, performant sparsity profile as well as high accuracy profile. + +With all of these considerations in mind, run the following Training-Aware Experiment command to achieve your use case goal: +```bash +sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 +``` + + +The output is as follows: + +MARK + +##### NLP Use Case: +You are working on a text classification use case to help classify text reviews received from your customers through your e-commerce website. You have been having slow inference times using the BERT-base model, but have an accurate model that you want to ensure does not take a large hit. + +You are targeting a balanced model, but are targeting a significant 6-12x performance boost in text classification throughput while also maintaining the highest level of accuracy with the model so that you can confidently deploy the model in production to solve your business case. You are focused on improving the throughput of the model to process more requests, but sacrifice as few points in accuracy as possible. + +You are targeting a balanced model in terms of wanting to get a 6-12x performance boost in throughput while losing little to no accuracy so your classifications are actionable. + +You can use a Sparsify Training-Aware Experiment to try and reach your goal. Since you want to use the SST2 dataset on BERT-base to get the highest performing model with the lowest accuracy hit, a Training-Aware Experiment makes the most sense for you for its highly optimized, performant sparsity profile as well as high accuracy profile. + +With all of these considerations in mind, run the following Training-Aware Experiment command to achieve your use case goal: + +```bash +sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 +``` +The output is as follows: +MARK + + +### Training-Aware Cloud Quickstart + +In addition to manually creating commands, you use Sparsify Cloud to generate Sparsify Training-Aware Experiment commands. + +To get started, read the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). + + +### Next Steps + +Now that you have successfully run a Training-Aware Experiment, check out the [One-Shot](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot_experiment-guide.md) and [Sparse-Transfer](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer_experiment-guide.md) Experiments to target different sparsity profiles. + + +### Resources +To learn more about Sparsify and all of the available pathways outside of Training-Aware Experiments, refer to the [Sparsify README](https://github.com/neuralmagic/sparsify). From 2f3237930122affc16fe9a11228d1c097955105d Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Thu, 13 Jul 2023 12:33:02 -0400 Subject: [PATCH 32/47] [run] add validation for optim level (#262) --- src/sparsify/cli/run.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/sparsify/cli/run.py b/src/sparsify/cli/run.py index 326de945..b1d28d4e 100644 --- a/src/sparsify/cli/run.py +++ b/src/sparsify/cli/run.py @@ -43,6 +43,8 @@ def one_shot(**kwargs): """ One shot sparsification of ONNX models """ + kwargs["optim_level"] = _validate_optim_level(kwargs.get("optim_level")) + # raises exception if sparsifyml not installed from sparsify.one_shot import one_shot @@ -76,6 +78,8 @@ def sparse_transfer(**kwargs): """ Run sparse transfer learning for a use case against a supported task and model """ + kwargs["optim_level"] = _validate_optim_level(kwargs.get("optim_level")) + from sparsify import auto auto_checks() @@ -95,6 +99,8 @@ def training_aware(**kwargs): """ Run training aware sparsification for a use case against a supported task and model """ + kwargs["optim_level"] = _validate_optim_level(kwargs.get("optim_level")) + from sparsify import auto auto_checks() @@ -131,6 +137,26 @@ def _parse_run_args_to_auto(sparse_transfer: bool, **kwargs): ) +def _validate_optim_level(optim_level: float) -> float: + """ + :param optim_level: cli ingested optim_level + :return: optim level scaled from 0-1 + :raises ValueError: for any values that are not float 0-1 or an integer 1-100 + """ + # optim level should always be defaulted by the CLI, asserting here for safety + assert optim_level is not None + + if 0 <= optim_level <= 1: + return optim_level + elif (1 < optim_level <= 100) and optim_level == int(optim_level): + return optim_level / 100.0 + else: + raise ValueError( + "optim-level must be a float value between 0-1 or an integer value " + f"between 0-100. Found {optim_level}" + ) + + def _maybe_unwrap_zoo_stub(model_path: str) -> str: if model_path.startswith("zoo:"): return Model(model_path).onnx_model.path From 3ed9320e2c0bb3e60802aa7d7fbbced97636f476 Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Thu, 13 Jul 2023 12:33:49 -0400 Subject: [PATCH 33/47] fix quality on main (readme copyright) (#263) --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 4e19c47a..da6c8620 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,22 @@ See the License for the specific language governing permissions and limitations under the License. --> + +

Sparsify tool icon  Sparsify [Alpha]

ML model optimization product to accelerate inference

From df15b07d32ccba215fad4786b694582aa57af706 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Fri, 14 Jul 2023 15:38:57 -0400 Subject: [PATCH 34/47] for imagefolder datasets, check if the datafolder exists (#264) --- src/sparsify/auto/tasks/image_classification/runner.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/sparsify/auto/tasks/image_classification/runner.py b/src/sparsify/auto/tasks/image_classification/runner.py index b7bd486e..7fcc30af 100644 --- a/src/sparsify/auto/tasks/image_classification/runner.py +++ b/src/sparsify/auto/tasks/image_classification/runner.py @@ -77,6 +77,12 @@ def config_to_args( if "dataset" not in config.kwargs: # custom datasets are set to imagefolder config.kwargs["dataset"] = "imagefolder" + if not os.path.exists(config.dataset): + raise FileNotFoundError( + f"The custom dataset {config.dataset} " + "does not exist. Please ensure that the path provided is correct." + ) + if "model_tag" not in config.kwargs: config.kwargs["model_tag"] = "sparsify_auto_image_classification" train_args = ImageClassificationTrainArgs( From 532a755bde1bec846d8e86608f30748ede80a38b Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 19 Jul 2023 10:12:24 -0400 Subject: [PATCH 35/47] update yolov5 yaml build to handle 3rd case (#266) --- src/sparsify/auto/utils/helpers.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/sparsify/auto/utils/helpers.py b/src/sparsify/auto/utils/helpers.py index 87a516f4..d371624e 100644 --- a/src/sparsify/auto/utils/helpers.py +++ b/src/sparsify/auto/utils/helpers.py @@ -15,6 +15,7 @@ """ Generic helpers for sparsify.auto """ +import glob import logging import os import re @@ -51,9 +52,10 @@ def initialize_banner_logger(): def create_yolo_data_yaml(dataset: str) -> str: """ - Check if the dataset provided is a data directory. If it is, buid a yolov5 yaml - file based on the provided data directory path. An example of the directory - structure for the provided directory path is shown below. There must + Check if the dataset provided is a data directory. If it is, check if there is + a yaml file within the directory and return the path to the yaml. If not, build + a yolov5 yaml file based on the provided data directory path. An example of the + directory structure for the provided directory path is shown below. There must subdirectories in the provided directory named `images`, `labels` and a text file called `classes.txt` which includes the list of the classes for the particular dataset, ordered by class id. The `images` and `labels` folders @@ -92,9 +94,16 @@ def _check_and_update_file(file_type: str, path: str): else: data_file_args[file_type] = [path] + # Case where the user provides just a yaml file path if not os.path.isdir(dataset): return dataset + # Case where the user provides a data directory with a yaml file + # Only one will be returned if multiple are provided + yaml_paths = glob.glob(f"{dataset}/*.y*ml") + if len(yaml_paths) > 0: + return yaml_paths[0] + image_path = os.path.join(dataset, image_dir) class_list_path = os.path.join(dataset, class_path) From 8d0fe182385c7df12a33e911572309da5bee3a74 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 19 Jul 2023 10:48:12 -0400 Subject: [PATCH 36/47] Update `training_aware` to produce expected outputs (#265) * update readme.txt to be README.md and yolov5 to produce model.onnx * move change to yolov5 repo --- src/sparsify/auto/tasks/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/runner.py b/src/sparsify/auto/tasks/runner.py index 47d369cb..e60cc815 100644 --- a/src/sparsify/auto/tasks/runner.py +++ b/src/sparsify/auto/tasks/runner.py @@ -412,7 +412,7 @@ def create_deployment_directory(self, train_directory: str, deploy_directory: st _LOGGER.info("Deleting %s" % origin_directory) shutil.rmtree(origin_directory) - with open(os.path.join(deploy_directory, "readme.txt"), "x") as f: + with open(os.path.join(deploy_directory, "README.md"), "x") as f: f.write("deployment instructions will go here") _LOGGER.info("Deployment directory moved to %s" % deploy_directory) From c3f8bb29be6a57649e2d3c7dbac9f23f63c7aca0 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 19 Jul 2023 15:26:06 -0400 Subject: [PATCH 37/47] Add deployment instructions to `README.md` (#267) * update readme.txt to be README.md and yolov5 to produce model.onnx * move change to yolov5 repo * add deployment instructions * add instructions --- MANIFEST.in | 1 + .../auto/tasks/deployment_instructions.md | 82 +++++++++++++++++++ src/sparsify/auto/tasks/runner.py | 9 +- 3 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 src/sparsify/auto/tasks/deployment_instructions.md diff --git a/MANIFEST.in b/MANIFEST.in index aafd306d..42d20225 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ recursive-include src/sparsify/ui/ * include LICENSE +include src/sparsify/auto/tasks/deployment_instructions.md diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md new file mode 100644 index 00000000..f8203cd5 --- /dev/null +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -0,0 +1,82 @@ +# Deployment Guide +​ +We recommend deploying with the [DeepSparse](https://github.com/neuralmagic/deepsparse) Engine for the best performance with sparsified models on CPUs. +The deployment folder contains everything necessary to benchmark and deploy a sparsified model with the DeepSparse Engine. +​ +## Requirements +​ +A Linux-based CPU system with Python versions 3.8-3.10 installed and AVX2 or greater instruction sets is required to run the DeepSparse Engine. +The DeepSparse Engine is not currently supported on Windows or MacOS. +To install DeepSparse, its dependencies, and check your system, run the following commands: +​ +```bash +pip install deepsparse +deepsparse.check_hardware +``` +​ +Other installation options may be needed, depending on your use case. +For more details and other installation options, see the [Installation Guide](https://github.com/neuralmagic/deepsparse). +​ +For more information on hardware support and system requirements, see the [Support and Requirements Guide](https://github.com/neuralmagic/deepsparse). +​ +## Benchmarking +​ +The `deepsparse.benchmark` command enables benchmarking of an ONNX model on your system. +The command takes a model path as a minimum argument and will run the model through a series of inference runs using random data. +For example: +​ +```bash +deepsparse.benchmark model.onnx +``` +​ +For more information on the `deepsparse.benchmark` command, see the [Benchmarking Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). +​ +## Pipeline Deployments +​ +DeepSparse contains many pipeline deployments for different use cases. +These pipelines package up the model inference and any pre- and post-processing steps into a single, optimized callable for deployment. +Additionally, custom pipelines are supported. +For example, a sample custom pipeline for ImageNet is provided below: +​ +```python +from deepsparse.pipelines.custom_pipeline import CustomTaskPipeline +from torchvision import transforms +from PIL import Image +import torch +​ +preprocess_transforms = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) +​ +def preprocess(img_file): + with open(img_file, "rb") as img_file: + img = Image.open(img_file) + img = img.convert("RGB") + img = preprocess_transforms(img) + batch = torch.stack([img]) + return [batch.numpy()] +​ +custom_pipeline = CustomTaskPipeline( + model_path="zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none", + process_inputs_fn=preprocess, +) +​ +scores, probs = custom_pipeline("goldfish.jpg") +``` +​ +For more information on the available pipelines and how to create custom pipelines, see the [Pipeline Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). +​ +## Server Deployments +​ +DeepSparse additionally contains a performant server deployment for different use cases. +The server deployment packages up the model inference and any pre- and post-processing steps into a single, optimized HTTP request for deployment. +To start the server, run the following command with the appropriate arguments: +​ +```bash +deepsparse.server --task TASK --model_path ./deployment +``` +​ +For more information on the `deepsparse.server` command, see the [Server Deployment Guide](zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none). \ No newline at end of file diff --git a/src/sparsify/auto/tasks/runner.py b/src/sparsify/auto/tasks/runner.py index e60cc815..97d465d6 100644 --- a/src/sparsify/auto/tasks/runner.py +++ b/src/sparsify/auto/tasks/runner.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. - import gc import json import logging import os +import pkgutil import shutil import socket import warnings @@ -153,7 +153,6 @@ def __init__(self, config: SparsificationTrainingConfig): self.dashed_cli_kwargs = False # True if CLI args require "-" as word separator self.train_args, self.export_args = self.config_to_args(self.config) - self.hardware_specs = analyze_hardware() self.tune_args_for_hardware(self.hardware_specs) @@ -412,8 +411,10 @@ def create_deployment_directory(self, train_directory: str, deploy_directory: st _LOGGER.info("Deleting %s" % origin_directory) shutil.rmtree(origin_directory) - with open(os.path.join(deploy_directory, "README.md"), "x") as f: - f.write("deployment instructions will go here") + readme_path = os.path.join(deploy_directory, "README.md") + instruc = pkgutil.get_data("sparsify.auto", "tasks/deployment_instructions.md") + with open(readme_path, "wb") as f: + f.write(instruc) _LOGGER.info("Deployment directory moved to %s" % deploy_directory) @abstractmethod From 5f92d58513cae2ce140da0aea9cd3a91e49c0e5d Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Thu, 20 Jul 2023 11:58:06 -0400 Subject: [PATCH 38/47] Update deployment instructions to install deepsparse[server] (#268) * Update deployment instructions to install deepsparse[server] * Add download link for buddy.jpeg --- src/sparsify/auto/tasks/deployment_instructions.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md index f8203cd5..5ac9b841 100644 --- a/src/sparsify/auto/tasks/deployment_instructions.md +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -10,7 +10,7 @@ The DeepSparse Engine is not currently supported on Windows or MacOS. To install DeepSparse, its dependencies, and check your system, run the following commands: ​ ```bash -pip install deepsparse +pip install deepsparse[server] deepsparse.check_hardware ``` ​ @@ -64,8 +64,10 @@ custom_pipeline = CustomTaskPipeline( process_inputs_fn=preprocess, ) ​ -scores, probs = custom_pipeline("goldfish.jpg") +scores, probs = custom_pipeline("buddy.jpeg") ``` +(Note: Download [buddy.jpeg](https://github.com/neuralmagic/deepsparse/blob/main/tests/deepsparse/pipelines/sample_images/buddy.jpeg)) + ​ For more information on the available pipelines and how to create custom pipelines, see the [Pipeline Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). ​ From f04793f4d88a639a46408c3f21176cf632d92923 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Thu, 20 Jul 2023 13:01:22 -0400 Subject: [PATCH 39/47] Update deployment_instructions.md Corrected DeepSparse product name and misc grammar nits. cc @rahul-tuli --- .../auto/tasks/deployment_instructions.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md index 5ac9b841..c7d7a766 100644 --- a/src/sparsify/auto/tasks/deployment_instructions.md +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -1,12 +1,12 @@ -# Deployment Guide +# Sparsify Deployment Guide ​ -We recommend deploying with the [DeepSparse](https://github.com/neuralmagic/deepsparse) Engine for the best performance with sparsified models on CPUs. -The deployment folder contains everything necessary to benchmark and deploy a sparsified model with the DeepSparse Engine. +Deploying with Neural Magic's inference runtime, [DeepSparse](https://github.com/neuralmagic/deepsparse), is recommended for the best performance with sparsified models on CPUs. +The deployment folder contains everything necessary to benchmark and deploy a sparsified model with DeepSparse. ​ ## Requirements ​ -A Linux-based CPU system with Python versions 3.8-3.10 installed and AVX2 or greater instruction sets is required to run the DeepSparse Engine. -The DeepSparse Engine is not currently supported on Windows or MacOS. +A Linux-based CPU system with Python versions 3.8-3.10 installed and AVX2 or greater instruction set is required to run DeepSparse. +DeepSparse is not currently supported on Windows or MacOS. To install DeepSparse, its dependencies, and check your system, run the following commands: ​ ```bash @@ -16,8 +16,8 @@ deepsparse.check_hardware ​ Other installation options may be needed, depending on your use case. For more details and other installation options, see the [Installation Guide](https://github.com/neuralmagic/deepsparse). -​ -For more information on hardware support and system requirements, see the [Support and Requirements Guide](https://github.com/neuralmagic/deepsparse). + +For the latest hardware support and system requirements, see the [Support and Requirements Guide](https://github.com/neuralmagic/deepsparse). ​ ## Benchmarking ​ @@ -81,4 +81,4 @@ To start the server, run the following command with the appropriate arguments: deepsparse.server --task TASK --model_path ./deployment ``` ​ -For more information on the `deepsparse.server` command, see the [Server Deployment Guide](zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none). \ No newline at end of file +For more information on the `deepsparse.server` command, see the [Server Deployment Guide](zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none). From dc3456b2ba3f6b7c2f80f566585ed7428dd7cfb7 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Thu, 20 Jul 2023 13:03:51 -0400 Subject: [PATCH 40/47] Update deployment_instructions.md Corrected incorrect link to DeepSparse Server Guide. cc @rahul-tuli --- src/sparsify/auto/tasks/deployment_instructions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md index c7d7a766..34866ace 100644 --- a/src/sparsify/auto/tasks/deployment_instructions.md +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -81,4 +81,4 @@ To start the server, run the following command with the appropriate arguments: deepsparse.server --task TASK --model_path ./deployment ``` ​ -For more information on the `deepsparse.server` command, see the [Server Deployment Guide](zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none). +For more information on the `deepsparse.server` command, see the [Server Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-server.md). From 73d6fc871456ea8f9fb42addc2f98936f0394edd Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Thu, 20 Jul 2023 14:39:43 -0400 Subject: [PATCH 41/47] Docs update part2 (#269) * Update README.md minor typo fixes * Update README.md - added numbers to TOC to match the numbered subheaders - grammar nits * add in updated docs --------- Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> --- README.md | 55 ++-- docs/datasets-guide.md | 276 ++++++++++++-------- docs/models-guide.md | 83 +++++- docs/one-shot-experiment-guide.md | 146 +++++++++++ docs/one-shot_experiment-guide.md | 251 ------------------ docs/sparse-transfer-experiment-guide.md | 316 +++++++---------------- docs/training-aware-experiment-guide.md | 158 ++++++++++++ docs/training-aware_experiment-guide.md | 302 ---------------------- docs/use-cases-guide.md | 57 ---- 9 files changed, 667 insertions(+), 977 deletions(-) create mode 100644 docs/one-shot-experiment-guide.md delete mode 100644 docs/one-shot_experiment-guide.md create mode 100644 docs/training-aware-experiment-guide.md delete mode 100644 docs/training-aware_experiment-guide.md delete mode 100644 docs/use-cases-guide.md diff --git a/README.md b/README.md index da6c8620..b2a3b0cb 100644 --- a/README.md +++ b/README.md @@ -80,14 +80,13 @@ Sparsify empowers you to compress models through two components: - **Sparsify CLI/API** - a Python package and GitHub repository that allows you to run Sparsify Experiments locally, sync with the Sparsify Cloud, and integrate them into your workflows. ## Table of Contents - - [Quickstart Guide](#quickstart-guide) - - [Install and Setup](#1-install-and-setup) - - [Run an Experiment](#2-run-an-experiment) - - [Compare Results](#3-compare-results) - - [Deploy a Model](#4-deploy-a-model) -- [Companion Guides](#companion-guides) -- [Resources](#resources) + - [1. Install and Setup](#1-install-and-setup) + - [2. Run an Experiment](#2-run-an-experiment) + - [3. Compare Results](#3-compare-results) + - [4. Deploy a Model](#4-deploy-a-model) +- [Companion Guides](#companion-guides) +- [Resources](#resources) ## Quickstart Guide @@ -116,12 +115,14 @@ First, verify that you have the correct software and hardware to run the Sparsif Sparsify is tested on Python 3.8 and 3.10, ONNX 1.5.0-1.12.0, ONNX opset version 11+, and manylinux compliant systems. Sparsify is not supported natively on Windows and MAC OS. + +Additionally, for installation from PyPi, pip 20.3+ is required.
Hardware -Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. +Sparsify requires a GPU with CUDA + CuDNN in order to sparsify neural networks. We recommend you use a Linux system with a GPU that has a minimum of 16GB of GPU Memory, 128GB of RAM, 4 CPU cores, and is CUDA-enabled. If you are sparsifying a very large model, you may need more RAM than the recommended 128GB. If you encounter issues setting up your training environment, [file a GitHub issue](https://github.com/neuralmagic/sparsify/issues). @@ -129,8 +130,8 @@ If you encounter issues setting up your training environment, [file a GitHub iss #### 1.2 Create an Account -Creating a new one-time account is simple and free. -An account is required to manage your Experiments and API keys. +Creating a new one-time account is simple and free. +An account is required to manage your Experiments and API keys. Visit the [Neural Magic's Web App Platform](https://account.neuralmagic.com/signup) and create an account by entering your email, name, and unique password. If you already have a Neural Magic Account, [sign in](https://account.neuralmagic.com/signin) with your email. @@ -146,7 +147,7 @@ Install with pip using: pip install sparsify-nightly ``` -#### 1.4 Login via CLI +#### 1.4 Log in via CLI Next, with Sparsify installed on your training hardware: 1. Authorize the local CLI to access your account by running the sparsify.login command and providing your API key. @@ -182,14 +183,14 @@ To run a One-Shot Experiment for your model, dataset, and use case, use the foll sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATASET --optim-level OPTIM_LEVEL ``` -For example, to sparsify a ResNet50 model on the ImageNet dataset for image classification, run the following commands: +For example, to sparsify a ResNet-50 model on the ImageNet dataset for image classification, run the following commands: ```bash wget https://public.neuralmagic.com/datasets/cv/classification/imagenet_calibration.tar.gz tar -xzf imagenet_calibration.tar.gz sparsify.run one-shot --use-case image_classification --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/base-none" --data ./imagenet_calibration --optim-level 0.5 ``` -Or, to sparsify a BERT model on the SST-2 dataset for sentiment analysis, run the following commands: +Or, to sparsify a BERT model on the SST2 dataset for sentiment analysis, run the following commands: ```bash wget https://public.neuralmagic.com/datasets/nlp/text_classification/sst2_calibration.tar.gz tar -xzf sst2_calibration.tar.gz @@ -199,7 +200,7 @@ sparsify.run one-shot --use-case text_classification --model "zoo:nlp/sentiment_ To dive deeper into One-Shot Experiments, read through the [One-Shot Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot_experiment-guide.md). -Note, One-Shot Experiments currently require the model to be in an ONNX format and the dataset to be in a Numpy format. +Note, One-Shot Experiments currently require the model to be in an ONNX format and the dataset to be in a NumPy format. More details are provided in the One-Shot Experiment Guide. @@ -209,19 +210,19 @@ More details are provided in the One-Shot Experiment Guide. |----------|----------------------|-----------| | **++++** | **++++** | **+++++** | -Sparse-Transfer Experiments quickly create a smaller and faster model for your dataset by transferring from a [SparseZoo](https://sparsezoo.neuralmagic.com/) pre-sparsified foundational model o, providing a 5-10x speedup with minimal accuracy loss, ideal for quick model optimization without retraining your model. +Sparse-Transfer Experiments quickly create a smaller and faster model for your dataset by transferring from a [SparseZoo](https://sparsezoo.neuralmagic.com/) pre-sparsified foundational model, providing a 5-10x speedup with minimal accuracy loss, ideal for quick model optimization without retraining your model. To run a Sparse-Transfer Experiment for your model (optional), dataset, and use case, run the following command: ```bash sparsify.run sparse-transfer --use-case USE_CASE --model OPTIONAL_MODEL --data DATASET --optim-level OPTIM_LEVEL ``` -For example, to sparse transfer a SparseZoo model to the ImageNette dataset for image classification, run the following command: +For example, to sparse transfer a SparseZoo model to the Imagenette dataset for image classification, run the following command: ```bash sparsify.run sparse-transfer --use-case image_classification --data imagenette --optim-level 0.5 ``` -Or, to sparse transfer a SparseZoo model to the SST-2 dataset for sentiment analysis, run the following command: +Or, to sparse transfer a SparseZoo model to the SST2 dataset for sentiment analysis, run the following command: ```bash sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 ``` @@ -229,7 +230,7 @@ sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim- To dive deeper into Sparse-Transfer Experiments, read through the [Sparse-Transfer Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer_experiment-guide.md). -Note, Sparse-Transfer Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or HuggingFace Transformers. +Note, Sparse-Transfer Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or Hugging Face Transformers. Datasets must additionally match the expected format of the underlying integration. More details and exact formats are provided in the Sparse-Transfer Experiment Guide. @@ -247,12 +248,12 @@ To run a Training-Aware Experiment for your model, dataset, and use case, run th sparsify.run training-aware --use-case USE_CASE --model OPTIONAL_MODEL --data DATASET --optim-level OPTIM_LEVEL ``` -For example, to sparsify a ResNet50 model on the ImageNette dataset for image classification, run the following command: +For example, to sparsify a ResNet-50 model on the Imagenette dataset for image classification, run the following command: ```bash sparsify.run training-aware --use-case image_classification --model "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenette/base-none" --data imagenette --optim-level 0.5 ``` -Or, to sparsify a BERT model on the SST-2 dataset for sentiment analysis, run the following command: +Or, to sparsify a BERT model on the SST2 dataset for sentiment analysis, run the following command: ```bash sparsify.run training-aware --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data sst2 --optim-level 0.5 ``` @@ -260,7 +261,7 @@ sparsify.run training-aware --use-case text_classification --model "zoo:nlp/sent To dive deeper into Training-Aware Experiments, read through the [Training-Aware Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware_experiment-guide.md). -Note, Training-Aware Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or HuggingFace Transformers. +Note that Training-Aware Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or Hugging Face Transformers. Datasets must additionally match the expected format of the underlying integration. More details and exact formats are provided in the Training-Aware Experiment Guide. @@ -284,7 +285,7 @@ You can compare the accuracy by looking through the metrics printed out to the c Additionally, you can use [DeepSparse](https://github.com/neuralmagic/deepsparse) to compare the inference performance on your CPU deployment hardware. -Note: In the near future, you will be able to visualize the results in the Cloud, simulate other scenarios and hyperparameters, compare the results to other Experiments, and package for your deployment scenario. +Note: In the near future, you will be able to visualize the results in Sparsify Cloud, simulate other scenarios and hyperparameters, compare the results to other Experiments, and package for your deployment scenario. To run a benchmark on your deployment hardware, use the `deepsparse.benchmark` command with your original model and the new optimized model. @@ -354,9 +355,11 @@ If you're not ready for deploying, congratulations on completing the quickstart! ## Companion Guides - [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md) -- [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md) -- [Sparsify Models Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) - [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md) +- [Sparsify Models Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/models-guide.md) +- [One-Shot Experiments Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot-experiment-guide.md) +- [Sparse-Transfer Experiments Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer-experiment-guide.md) +- [Training-Aware Experiments Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware-experiment-guide.md) ## Resources @@ -364,7 +367,7 @@ Now that you have explored Sparsify [Alpha], here are other related resources. ### Feedback and Support -Report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). Alpha support is provided through those channels. +Report UI issues and CLI errors, submit bug reports, and provide general feedback about the product to the Sparsify team via the [nm-sparsify Slack Channel](https://join.slack.com/t/discuss-neuralmagic/shared_invite/zt-1xkdlzwv9-2rvS6yQcCs7VDNUcWxctnw), or via [GitHub Issues](https://github.com/neuralmagic/sparsify/issues). Alpha support is provided through those channels. ### Terms and Conditions @@ -380,7 +383,7 @@ Thank you in advance for your feedback and interest! ### Learning More -- Documentation: [SparseML,](https://docs.neuralmagic.com/sparseml/) [SparseZoo,](https://docs.neuralmagic.com/sparsezoo/) [Sparsify (1st Generation),](https://docs.neuralmagic.com/sparsify/) [DeepSparse](https://docs.neuralmagic.com/deepsparse/) +- Documentation: [SparseML](https://docs.neuralmagic.com/sparseml/), [SparseZoo](https://docs.neuralmagic.com/sparsezoo/), [Sparsify](https://docs.neuralmagic.com/sparsify/), [DeepSparse](https://docs.neuralmagic.com/deepsparse/) - Neural Magic: [Blog,](https://www.neuralmagic.com/blog/) [Resources](https://www.neuralmagic.com/resources/) ### Release History diff --git a/docs/datasets-guide.md b/docs/datasets-guide.md index a1f1f466..b95d0b9d 100644 --- a/docs/datasets-guide.md +++ b/docs/datasets-guide.md @@ -20,171 +20,245 @@ For all Sparsify Experiments, you will need to provide a dataset to create a spa Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. You will need to make sure that your data is formatted properly according to the standards listed below. -## Predefined Use Cases +## Table of Contents -### Training-Aware and Sparse-Transfer +1. [Image Classification](#image-classification) +2. [Object Detection](#object-detection) +3. [Image Segmentation](#image-segmentation) +4. [NLP](#nlp) +5. [NPZ](#npz) +6. [Custom](#custom) -Training-Aware and Sparse-Transfer utilize specific dataset standards depending on the use case. -Each one is listed below with an example. +## Image Classification -#### Image Classification +For image classification tasks, Sparsify relies on the standard `SPLIT/CLASS/IMAGE` format used by the PyTorch ImageFolder class. -For image classification tasks, Sparsify relies on the dataset format standard used by the PyTorch ImageFolder class. -This format is fairly simple and intuitive, and it is also widely used in the machine learning community. - -##### Specifications - -- The root folder should contain subdirectories, each representing a single class of images. +### Specifications +- The root folder should contain `train` and `val` subdirectories, each representing the training and validation splits of the dataset. +- Each split should contain subdirectories, each representing a single class of images. - Images of a particular class/category should be placed inside the corresponding subdirectory. - The subdirectory name is used as the class label and should be unique for each class. - The images should be in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. - Images do not need to be of the same size. -The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. -Therefore, it is crucial to ensure the directories are properly named to avoid any confusion or mislabeling. - -##### Example - -For an image classification task involving dogs and cats, the dataset directory should be structured as follows: +The root directory containing the splits data samples should be passed to the CLI as the `--data` argument. +### Structure +```text +data +├── train +│ ├── class_1 +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ ├── class_2 +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ └── ... +└── val + ├── class_1 + │ ├── image_1.png + │ ├── image_2.png + │ └── ... + ├── class_2 + │ ├── image_1.png + │ ├── image_2.png + │ └── ... + └── ... ``` -root/dog/xxx.png -root/dog/xxy.png -root/dog/xxz.png -root/cat/123.png -root/cat/nsa.png -root/cat/asd.png -``` +For more details and examples on creating image classification datasets for Sparsify, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). -In this example, all images within the 'dog' subdirectory will be labeled as 'dog', and all images within the 'cat' subdirectory will be labeled as 'cat'. -The exact filenames ('xxx.png', 'xxy.png', etc.) do not matter; what matters is the directory structure and the directory names. +### Example -By organizing the data in this way, it can be easily read and labeled by the PyTorch ImageFolder class, and thus easily used for training image classification models in Sparsify. -Note, the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. -Here, 'cat' will be considered class 0, and 'dog' will be class 1, due to alphabetical order. -#### Object Detection / Image Segmentation +## Object Detection -For object detection and image segmentation tasks, Sparsify supports the dataset format used by YOLOv5. -This format is specifically designed for tasks involving bounding boxes and segmentation masks and is widely adopted in the community. +For object detection tasks, Sparsify utilizes the YOLO format for datasets. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/datasets/detect/) +The format is made up of a YAML file containing the root dataset location, the classes, and the training and validation split locations. -##### Specifications +If a directory is supplied instead and there is no YAML file within the directory, Sparsify will automatically create one for you. +To auto create a YAML file, the directory structure must be the same as listed below in addition to containing a classes.txt file which contains the class names with one per line. -- Images should be stored in a common directory, generally named `images`. -- Annotations for the images should be stored in a separate directory, often named `labels`. -- Images can be in formats readable by OpenCV (e.g. .jpg, .png). -- Each image should have a corresponding annotation file. The annotation files should be in plain text format (.txt). -- The name of the annotation file should be the same as the corresponding image file, except with a .txt extension. -- Annotation files for object detection should contain one line for each object in the image. Each line should be in the format: ` `, where the values are normalized relative to the size of the image. -- Annotation files for image segmentation should contain information about the segmentation masks. +### Specifications +- The root folder should contain `labels` and `images` subdirectories. +- Underneath both the `labels` and `images` directories, there should be `train` and `val` subdirectories, each representing the training and validation splits of the dataset. +- The split directories under `labels` should contain the YOLO format label files with a single `.txt` file per image. +- The text files underneath the `labels` directories should contain a single line per object of the format `class_index x_center y_center width height` where the coordinates are normalized between 0 and 1 and the class numbers are zero-indexed. +- The split directories under `images` should contain the images of any size in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. +- Each image file must have a corresponding label file with the same name in the `labels` directory. +- If supplying a directory without a YAML file, the directory must also contain a `classes.txt` file with one class name per line in the same order as the class numbers in the label files. -##### Example +### Structure +```text +data +├── images +│ ├── train +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ ├── val +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ └── ... +├── labels +│ ├── train +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ ├── val +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ └── ... +├── classes.txt +└── dataset.yaml +``` -For an object detection task involving detecting cars and pedestrians, the dataset directory should be structured as follows: +For more details and examples on creating object detection datasets for Sparsify, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). -``` -dataset/ -├── images/ -│ ├── image1.jpg -│ └── image2.jpg -└── labels/ - ├── image1.txt - └── image2.txt -``` +### Example -For `image1.jpg`, if there's a car and a pedestrian in the image, the corresponding `image1.txt` file could look like this: -``` -0 0.5 0.6 0.2 0.3 -1 0.7 0.8 0.1 0.2 -``` +## Image Segmentation -This would mean that there is an object of class 0 (car) centered at (50% of image width, 60% of image height) and having a width of 20% of the image width and height 30% of the image height. -The second line is similar but for an object of class 1 (pedestrian). +For image segmentation tasks, Sparsify utilizes the YOLO format for datasets. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/datasets/segment/) +The format is made up of a YAML file containing the root dataset location, the classes, and the training and validation split locations. -For image segmentation, the labels might be more complex, including segmentation masks that indicate which pixels belong to which object category. +If a directory is supplied instead and there is no YAML file within the directory, Sparsify will automatically create one for you. +To auto create a YAML file, the directory structure must be the same as listed below in addition to containing a classes.txt file which contains the class names with one per line. -Make sure the class labels are consistent with what is expected by the YOLOv5 configuration you are using, and that the bounding box coordinates are normalized as described above. +### Specifications +- The root folder should contain `annotations` and `images` subdirectories. +- Underneath both the `annotations` and `images` directories, there should be `train` and `val` subdirectories, each representing the training and validation splits of the dataset. +- The split directories under `annotations` should contain the YOLO format annotation files with a single `.txt` file per image. +- The text files underneath the `annotations` directories should contain a single line per object of the format `class_index x_1 y_1 x_2 y_2 x_3 y_3` where the coordinates that bound the object are normalized between 0 and 1 and the class numbers are zero-indexed. +- The split directories under `images` should contain the images of any size in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. +- Each image file must have a corresponding annotation file with the same name in the `annotations` directory. +- If supplying a directory without a YAML file, the directory must also contain a `classes.txt` file with one class name per line in the same order as the class numbers in the annotation files. -#### Natural Language (NLP/NLG) +### Structure +```text +data +├── images +│ ├── train +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ ├── val +│ │ ├── image_1.png +│ │ ├── image_2.png +│ │ └── ... +│ └── ... +├── annotations +│ ├── train +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ ├── val +│ │ ├── image_1.txt +│ │ ├── image_2.txt +│ │ └── ... +│ └── ... +├── classes.txt +└── dataset.yaml +``` -For natural language processing (NLP) and natural language generation (NLG) tasks, Sparsify supports the dataset formats used by the Hugging Face library. -Hugging Face datasets can be represented in various file formats including JSON, CSV, and JSON lines format (.json). +For more details and examples on creating segmentation datasets for Sparsify, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md). -##### Specifications +### Example -- Each row or line in your data file should represent a single example. -- The data must include the features necessary for your task. For example, a dataset for text classification might include 'text' and 'label' fields. -- For JSON files, each line should be a separate, self-contained JSON object. -- For CSV files, the first row should include the column names, and each subsequent row should include the fields for a single example. -- The file should be UTF-8 encoded to support a wide range of text inputs. -##### Example +## NLP -Here is an example of how you might structure a dataset for a sentiment analysis task: +For NLP tasks, Sparsify utilizes the HuggingFace [Datasets](https://huggingface.co/docs/datasets/) format and expectations. +Hugging Face datasets can be represented in various file formats, including CSV, and JSON lines format (.jsonl). -If you're using a JSON lines (.json) format, your file could look like this: +Specifications: +- The root folder should contain JSON or CSV files associated with each split of the dataset. +- The JSON or CSV files must be named such that the training data contains the word `train`, validation data contains the word `val`, and any optional test data contains the word `test`. +- For JSON files, each line must be a JSON object representing a single data sample. +- For CSV files, the first row must be a header row containing the column names. +- The label column must be named `label`. +- The features column will be dynamically determined based on the column names and the rules below + - If both `setence1` and `sentence2` are present, these columns will be taken as the features. + - Otherwise the first non label columns will be used for the features with sentence1 being set to the first column and setence2 being set to the second if present. +- The files should be UTF-8 encoded. +### Structure + +#### JSON +```text +data +├── train.json +├── val.json +└── test.json ``` + +Where the contents of each JSON file would look like the following: +```text {"text": "I love this movie!", "label": "positive"} {"text": "This movie was awful.", "label": "negative"} {"text": "I have mixed feelings about this film.", "label": "neutral"} ``` -Each line is a separate JSON object, representing a single example. - -If you're using a CSV format, your file could look like this: - +#### CSV +```text +data +├── train.csv +├── val.csv +└── test.csv ``` + +Where the contents of each CSV file would look like the following: +```text text,label "I love this movie!","positive" "This movie was awful.","negative" "I have mixed feelings about this film.","neutral" ``` -The first row contains the column names, and each subsequent row represents a single example. +### Example -Whether you choose to use JSON lines or CSV will depend on your specific needs and preferences, but either format will work well with Hugging Face and Sparsify. -Make sure your data is formatted correctly according to these specifications to ensure it can be used in your experiments. -### One-Shot -For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. -This format is efficient and versatile. -In the near future, more functionality will be landed such that the definitions given above for Training-Aware and Sparse-Transfer will work as well. +## NPZ -#### Specifications +For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. +In the future, more formats will be added for support with One-Shot Experiments. -- Each `.npz` file should contain a single data sample, with no batch dimension. This data sample will be run through the ONNX model. +### Specifications +- Each `.npz` file should contain a single data sample, with no batch dimension. + This data sample will be run through the ONNX model. - The `.npz` file should be structured as a dictionary, mapping the input name in the ONNX specification to a numpy array containing the data. -- All data samples should be stored under the same directory, typically named `data`. +- All data samples should be stored under the same directory, typically named `data`. -The local file structure should look like the following: +The root directory containing the data samples should be passed to the CLI as the `--data` argument. +### Structure ```text data - -- input1.npz - -- input2.npz - -- input3.npz +├── input1.npz +├── input2.npz +├── input3.npz ``` -#### Example - -For example, if you have a BERT-style model with a sequence length of 128, each `.npz` file should contain a dictionary mapping input names ("input_ids", "attention_mask", "token_type_ids") to numpy arrays of the appropriate size: - +Where each `input#.npz` file contains a single data sample, and the data sample is structured as a dictionary mapping the input name in the ONNX specification to a numpy array containing the data that matches the input shapes without the batch dimension. +For example, a BERT-style model running with a sequence length of 128 would have the following data sample: ```text { - "input_ids": ndarray(128,), - "attention_mask": ndarray(128,), - "token_type_ids": ndarray(128,) + "input_ids": ndarray(128,), + "attention_mask": ndarray(128,), + "token_type_ids": ndarray(128,) } ``` -The dictionary keys should match the names of the inputs in the ONNX model specification, and the shapes of the arrays should match the expected input shapes of the model. - -#### Generating NPZ Files +### Example Below is an example script for generating this file structure from a PyTorch module before the ONNX export: @@ -240,7 +314,7 @@ model.save() Note: Replace `YOUR_MODEL` and `YOUR_DATA_LOADER` with your PyTorch model and data loader, respectively. -## Custom Use Cases +## Custom Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. In the near future, these will be supported through plugin specifications. diff --git a/docs/models-guide.md b/docs/models-guide.md index 1f8b9aff..6610ca4c 100644 --- a/docs/models-guide.md +++ b/docs/models-guide.md @@ -16,19 +16,80 @@ limitations under the License. # Sparsify Models Guide -For any Sparsify Experiments, a dense model can be supplied for sparsification. -One-Shot is the only experiment type that requires a model to be passed in. -For others, a default model will be chosen to best fit the given use case. +For most Sparsify Experiments, you will need to provide a base model to create a sparse model from. Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for models. -You will need to make sure that your models are formatted properly according to the standards listed below. +You will need to make sure that your model is formatted properly according to the standards listed below. -## One-Shot +## Table of Contents -The ONNX model format is the only currently supported format for One-Shot. -See the [SparseML documentation](https://docs.neuralmagic.com) for exporting to ONNX formats. -In the near future, more formats will be added for support with One-Shot. +1. [Image Classification](#image-classification) +2. [Object Detection](#object-detection) +3. [Image Segmentation](#image-segmentation) +4. [NLP](#nlp) +5. [ONNX](#onnx) +6. [Custom](#custom) -## Training-Aware and Sparse-Transfer +## Image Classification -The PyTorch model format is the only currently supported format for Training-Aware and Sparse-Transfer Experiments. -The exact format will depend on the pipeline, and therefore the use case, for the experiment. +For image classification tasks, Sparsify relies on the PTH format generated from SparseML. +Specifically, the PTH format generated from the `ModuleExporter` class in SparseML. +This will save a model in the PTH format with the following structure: + +### Structure +```text +{ + "state_dict": model.state_dict(), + "optimizer": optimizer.state_dict(), + "recipe": recipe, + "epoch": epoch, + "arch_key": arch_key, +} +``` + +### Example +```python +from sparseml.pytorch.image_classification.utils import ModuleExporter +from torchvision.models import resnet18 + +model = resnet18() +exporter = ModuleExporter(model, "./") +exporter.export_pytorch( + optimizer=None, + epoch=-1, + recipe=None, + name=f"{model}.pth", + arch_key="resnet18", +) +``` + +## Object Detection + +For object detection tasks, Sparsify utilizes the YOLO format for models. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/) +This is the default format that is saved from training within the YOLOv5 or YOLOv8 repos. + +More information on the YOLO format can be found [here](https://docs.ultralytics.com/tasks/detect/#models). + +## Image Segmentation + +For image segmentation tasks, Sparsify utilizes the YOLO format for models. +This is the same format used by Ultralytics [YOLOv5/YOLOv8](https://docs.ultralytics.com/) +This is the default format that is saved from training within the YOLOv5 or YOLOv8 repos. + +More information on the YOLO format can be found [here](https://docs.ultralytics.com/tasks/segment/#models). + +## NLP + +For NLP tasks, Sparsify utilizes the HuggingFace Models format and expectations. +This includes the standard tokenizer.json, config.json, and bin files. +If using any of the standard transformers pathways externally or through SparseML, then this is the default format models are saved in. + +More information on the HuggingFace Models format can be found [here](https://huggingface.co/transformers/model_sharing.html). + +## ONNX + +For One-Shot Experiments, Sparsify utilizes the `.ONNX` format for models. +In the future, more formats will be added for support with One-Shot Experiments. + +For more information on the ONNX format, see the [ONNX website](https://onnx.ai/). +For more information on exporting to the ONNX format, see our docs page [here](https://docs.neuralmagic.com/user-guides/onnx-export). diff --git a/docs/one-shot-experiment-guide.md b/docs/one-shot-experiment-guide.md new file mode 100644 index 00000000..49b0159b --- /dev/null +++ b/docs/one-shot-experiment-guide.md @@ -0,0 +1,146 @@ + + +# Sparsify One-Shot Experiment Guide + +If you're just getting started with Sparsify, we recommend you try out this One-Shot Experiment pathway first. +We also have Sparse-Transfer and Training-Aware Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. + +## Table of Contents + +1. [Experiment Overview](#experiment-overview) +2. [CLI Quickstart](#cli-quickstart) +4. [Examples](#examples) +5. [Next Steps](#next-steps) +6. [Resources](#resources) + + +## Experiment Overview + +| Sparsity | Sparsification Speed | Accuracy | +|----------|----------------------|----------| +| **++** | **+++++** | **+++** | + +One-Shot Experiments are the quickest way to create a faster and smaller version of your model. +The algorithms are applied to the model post-training, utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. + +Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. +They are ideal for when you want to quickly sparsify your model and have limited time to spend on the sparsification process. + +The CLI Quickstart below will walk you through the steps to run a One-Shot Experiment on your model. +To utilize the cloud pathways for One-Shot Experiments, review the [Cloud User Guide](./cloud-user-guide.md). + +## CLI Quickstart + +Now that you understand what a One-Shot Experiment is and the benefits, including short optimization time due to post-training algorithms, you can now use the CLI to effectively run a One-Shot Experiment. + +Before you run a One-Shot Experiment, confirm you are logged into the Sparsify CLI. +For installation and setup instructions, review the [Install and Setup Section](../README.md#1-install-and-setup) in the Sparsify README. + +One-Shot Experiments use the following general command: + +```bash +sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL* +``` + +* optional arguments + +The description, rules, and possible values for each of the arguments are described below: +- [USE_CASE](#use_case) +- [MODEL](#model) +- [DATA](#data) +- [OPTIM_LEVEL](#optim_level) (Optional) + +### USE_CASE + +The generally supported use cases for Sparsify are: +- `cv-classification` +- `cv-detection` +- `cv-segmentation` +- `nlp-question_answering` +- `nlp-text_classification` +- `nlp-sentiment_analysis` +- `nlp-token_classification` +- `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. +Sparsify will automatically recognize these aliases and apply the correct use case. + +For One-Shot Experiments, both the CLIs and APIs always support custom use cases. +To utilize, run a One-Shot Experiment with `--use-case` set to the desired custom use case. +This custom use case can be any ASCII string. + +### MODEL + +One-Shot requires the model provided to be in an [ONNX format](https://onnx.ai/). +The ONNX model must be exported with static input shapes and not contain custom ONNX operators. +For guidance on how to convert a PyTorch model to ONNX, read our [ONNX Export User Guide](https://docs.neuralmagic.com/user-guides/onnx-export). + +In the near future, more formats including PyTorch will be added for support with One-Shot Experiments. + +### DATA + +For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. +In the future, more formats will be added for support with One-Shot Experiments. + +Specifically, the following structure is expected for the dataset: +```text +data +├── input1.npz +├── input2.npz +├── input3.npz +``` + +Where each `input#.npz` file contains a single data sample, and the data sample is structured as a dictionary mapping the input name in the ONNX specification to a numpy array containing the data that matches the input shapes without the batch dimension. +For example, a BERT-style model running with a sequence length of 128 would have the following data sample: +```text +{ + "input_ids": ndarray(128,), + "attention_mask": ndarray(128,), + "token_type_ids": ndarray(128,) +} +``` + +For more information on the specs and guides for creating the NPZ format, read the [NPZ Dataset Guide](./datasets-guide.md#npz). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. +Specifically, it controls how much sparsification is applied to your model, with higher values resulting in faster and more compressed models. +At the max range, though, you may see a drop in accuracy. + +Given that One-Shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with Sparse-Transfer or Training-Aware. +The current ranges are the following (subject to change): +- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. +- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. +- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 40% for 0.3 to 80% for 1.0 with linear scaling between. + Additionally, INT8 quantization of the model is applied. + +The default of 0.5 will result in a ~50% sparse model with INT8 quantization. + +## Examples + +Check back in soon for walkthroughs and examples of One-Shot Experiments applied to various popular models and use cases. + +### Next Steps + +Now that you have successfully run a One-Shot Experiment, check out the following guides to continue your Sparsify journey: +- [Sparse Transfer Experiment Guide](./sparse-transfer-experiment-guide.md) +- [Training Aware Experiment Guide](./training-aware-experiment-guide.md) + +### Resources + +To learn more about Sparsify and the available pathways other than One-Shot Experiments, refer to the [Sparsify README](../README.md). diff --git a/docs/one-shot_experiment-guide.md b/docs/one-shot_experiment-guide.md deleted file mode 100644 index 9a60a022..00000000 --- a/docs/one-shot_experiment-guide.md +++ /dev/null @@ -1,251 +0,0 @@ - - - - -# Sparsify One-Shot Experiment Guide - -If you're just getting started with Sparsify, we recommend you try out this One-Shot Experiment pathway first. We also have Sparse-Transfer and Training-Aware Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. - -## Overview -1. One-Shot Experiment Overview -2. One-Shot CLI Quickstart -3. One-Shot Cloud Quickstart -4. Next Steps -5. Resources - - -### One-Shot Experiment Overview - -| Sparsity | Sparsification Speed | Accuracy | -|----------|----------------------|----------| -| **++** | **+++++** | **+++** | - -One-Shot Experiments are the quickest way to create a faster and smaller version of your model. -The algorithms are applied to the model post-training, utilizing a calibration dataset, so they result in no further training time and much faster sparsification times compared with Training-Aware Experiments. - -Generally, One-Shot Experiments result in a 3-5x speedup with minimal accuracy loss. -They are ideal for when you want to quickly sparsify your model and have limited time to spend on the sparsification process. - - -### One-Shot CLI Quickstart - -Now that you understand what a One-Shot Experiment is and the benefits, including short optimization time due to post-training algorithms, you can now use the CLI to effectively run a One-Shot Experiment. - -Before you run a One-Shot Experiment, confirm you are logged into the Sparsify CLI. For installation and setup instructions, review the [Sparsify Install and Setup Section](README section.com) in the Sparsify README. - -One-Shot Experiments use the following general command: - -```bash -sparsify.run one-shot --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL -``` - -The values for each of the arguments follow these general rules: -- [**`USE_CASE`** ](#use_case) -- [**`MODEL`**](#model) -- [**`DATA`**](#data) -- [**`OPTIM_LEVEL`**](#optim_level) - -#### USE_CASE - -The generally supported use cases for Sparsify are: - -- CV - classification: `cv-classification` -- CV - detection: `cv-detection` -- CV - segmentation: `cv-segmentation` -- NLP - question answering: `nlp-question_answering` -- NLP - text classification: `nlp-text_classification` -- NLP - sentiment analysis: `nlp-sentiment_analysis` -- NLP - token classification: `nlp-token_classification` -- NLP - named entity recognition: `nlp-named_entity_recognition` - -Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. Sparsify will automatically recognize these aliases and apply the correct use case. - -For One-Shot Experiments, both the CLIs and APIs always support custom use cases. To utilize, run a One-Shot Experiment with `--use-case` set to the desired custom use case. This custom use case can be any string as long as it does not contain ASCII characters. - -For full details on Sparsify use cases, read the [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). - -#### MODEL - -One-Shot requires the model provided to be in an [ONNX format](https://onnx.ai/). For guidance on how to convert a PyTorch model to ONNX, read our [ONNX Export User Guide](https://docs.neuralmagic.com/user-guides/onnx-export). - -In the near future, more formats including PyTorch will be added for support with One-Shot Experiments. - -#### DATA - -For One-Shot Experiments, Sparsify utilizes the `.npz` format for data storage, which is a file format based on the popular NumPy library. This format is efficient and versatile. - -##### Dataset Specifications - -- Each `.npz` file should contain a single data sample, with no batch dimension. This data sample will be run through the ONNX model. -- The `.npz` file should be structured as a dictionary, mapping the input name in the ONNX specification to a numpy array containing the data. -- All data samples should be stored under the same directory, typically named `data`. - -The local file structure should look like the following: - -```text -data - -- input1.npz - -- input2.npz - -- input3.npz -``` - -##### Example - -For example, if you have a BERT-style model with a sequence length of 128, each `.npz` file should contain a dictionary mapping input names ("input_ids", "attention_mask", "token_type_ids") to numpy arrays of the appropriate size: - -```text -{ - "input_ids": ndarray(128,), - "attention_mask": ndarray(128,), - "token_type_ids": ndarray(128,) -} -``` - -The dictionary keys should match the names of the inputs in the ONNX model specification, and the shapes of the arrays should match the expected input shapes of the model. - -##### Generating NPZ Files - -Below is an example script for generating this file structure from a PyTorch module **before the ONNX export**: - -```python -import numpy as np -import torch -from torch import Tensor - -class NumpyExportWrapper(torch.nn.Module): - def __init__(self, model): - super(NumpyExportWrapper, self).__init__() - self.model = model - self.model.eval() # Set model to evaluation mode - self.numpy_data = [] - - def forward(self, *args, **kwargs): - with torch.no_grad(): - inputs = {} - batch_size = 0 - - for index, arg in enumerate(args): - if isinstance(arg, Tensor): - inputs[f"input_{index}"] = arg - batch_size = arg.size[0] - - for key, val in kwargs.items(): - if isinstance(val, Tensor): - inputs[key] = val - batch_size = val.shape[0] - - start_index = len(self.numpy_data) - for _ in range(batch_size): - self.numpy_data.append({}) - - for input_key in iter(inputs): - for idx, input in enumerate(inputs[input_key]): - self.numpy_data[start_index+idx][input_key] = input - - return self.model(*args, **kwargs) - - def save(self, path: str = "data"): - for index, item in enumerate(self.numpy_data): - npz_file_path = f'{path}/input{str(index).zfill(4)}.npz' - np.savez(npz_file_path, **item) - - print(f'Saved {len(self.numpy_data)} npz files to {path}') - -model = NumpyExportWrapper(YOUR_MODEL) -for data in YOUR_DATA_LOADER: - model(data[0]) -model.save() -``` - -Note: Replace `YOUR_MODEL` and `YOUR_DATA_LOADER` with your PyTorch model and data loader, respectively. - -For full details on Sparsify datasets, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md#sparsify-datasets-guide). - -#### OPTIM_LEVEL - -When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. Specifically, it controls how much sparsification is applied to your model, with higher values resulting in faster and more compressed models. At the max range, though, you may see a drop in accuracy. - -The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. -0.5 is the default optim level and is a good starting point for most use cases. - -##### One-Shot Optim Levels - -Given that One-Shot is applied in post-training, the sparsity ranges are lowered to avoid accuracy drops as compared with Sparse-Transfer or Training-Aware. -The specific ranges are the following: - -- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. -- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. -- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 40% for 0.3 to 80% for 1.0 with linear scaling between. - Additionally, INT8 quantization of the model is applied. - -The default of 0.5 will result in a ~50% sparse model with INT8 quantization. - - -### Example One-Shot Experiment CLI Commands - -Here are code examples of One-Shot Experiments you may wish to run; pick your use case and start sparsifying with One-Shot! - -#### Running One-Shot Experiments - -##### Computer Vision Use Case: - -You have an image classification use case and want to run a One-Shot Experiment on a dense ResNet-50 model using the imagenette dataset. You want to quickly and cheaply generate a sparse model so that you can build a prototype of the ResNet-50 model inferencing on a CPU server in the cloud with DeepSparse. Getting a working model that meets your deployment requirements on the imagenette dataset will give you the confidence to continue on your initiative knowing you can hit the metrics required for the business. - -You are targeting a balanced model in terms of wanting to get a 3-5x performance boost in latency while also maintaining the high accuracy of the model so that you can confidently deploy the model in production to solve your business case. - -You can use a Sparsify One-Shot Experiment to try and reach your goal. You have a standard ResNet-50 model as your dense baseline on imagenette which Sparsify already has as an alias model and npz formatted dataset hosted for you to use out of the box. Since you want to very quickly achieve a 3-5x speedup in latency performance with minimal training costs, a One-Shot Experiment makes the most sense for you for its fast optimization and lower, moderately performant sparsity profile. - -With all of these considerations in mind, run the following One-Shot Experiment command to achieve this use case goal: -```bash -sparsify.run one-shot --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 -``` -The output is as follows: - -MARK - -##### NLP Use Case: -You are working on a text classification use case to help classify text reviews received from your customers through your e-commerce website. You have been having slow inference times using the BERT-base model and want to improve the performance to save costs. - -You want to quickly and cheaply generate a sparse BERT-base model so that you can use it to classify our customer reviews at a lower cost due to the improved performance and speed of the model. You are focused on improving the throughput of the model to process more requests, faster. - -You are targeting a balanced model in terms of wanting to get a 3-5x performance boost in throughput while having a high accuracy so your classifications are actionable. - -You can use a Sparsify One-Shot Experiment to try and reach your goal. You have a standard BERT-base model as our dense baseline on the SST2 dataset which Sparsify already has as an alias model and npz formatted dataset hosted for you to use out of the box. You want to try and reduce your costs by improving the throughput performance of your model and you are limited by our compute spend and team size. A One-Shot Experiment makes the most sense for you for its fast optimization and lower cost pathway as opposed to fully retraining the model to optimize it. - -With all of these considerations in mind, run the following One-Shot Experiment command to achieve your goal this use case goal: - -```bash -sparsify.run one-shot --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 -``` -The output is as follows: -MARK - - -### One-Shot Cloud Quickstart - -In addition to manually creating commands, you can use the Sparsify Cloud to generate Sparsify One-Shot Experiment commands. - -To get started, read the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). - - -### Next Steps - -Now that you have successfully run a One-Shot Experiment, check out the [Sparse-Transfer](LINK.com) and [Training-Aware](LINK.com) Experiments to target different sparsity profiles. - - -### Resources -To learn more about Sparsify and all of the available pathways outside of One-Shot Experiments, refer to the [Sparsify README](https://github.com/neuralmagic/sparsify). diff --git a/docs/sparse-transfer-experiment-guide.md b/docs/sparse-transfer-experiment-guide.md index 830fb61b..ba108b19 100644 --- a/docs/sparse-transfer-experiment-guide.md +++ b/docs/sparse-transfer-experiment-guide.md @@ -1,6 +1,3 @@ - - - + +# Sparsify Training-Aware Experiment Guide + +The Sparsify Training-Aware Experiment Guide is a guide for running Training-Aware Experiments with the Sparsify CLI. +We also have One-Shot and Sparse-Transfer Experiments, which you can explore in the [Next Steps](#next-steps) section of this guide. + +## Table of Contents + +1. [Experiment Overview](#experiment-overview) +2. [CLI Quickstart](#cli-quickstart) +4. [Examples](#examples) +5. [Next Steps](#next-steps) +6. [Resources](#resources) + +## Experiment Overview + +| Sparsity | Sparsification Speed | Accuracy | +|-----------|-----------------------|-----------| +| **+++++** | **++** | **+++++** | + +Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. +The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. +However, they do require additional training time and hyperparameter tuning to achieve the best results. + +Generally, Training-Aware Experiments result in a 6–12x speedup with minimal accuracy loss. +They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. + +The CLI Quickstart below will walk you through the steps to run a Training-Aware Experiment on your model. +To utilize the cloud pathways for Training-Aware Experiments, review the [Cloud User Guide](./cloud-user-guide.md). + +## CLI Quickstart + +Now that you understand what a Training-Aware Experiment is and the benefits, including the best possible recovery of accuracy for an optimized model, you're ready to use the CLI to effectively run a Training-Aware Experiment. + +Before you run a Training-Aware Experiment, confirm you are logged in to the Sparsify CLI. +For instructions on Installation and Setup, review the [Sparsify Install and Setup Section](READMEsection.com) in the Sparsify README. + +Training-Aware Experiments use the following general command: + +```bash +sparsify.run training-aware --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL* +``` + +* optional arguments + +The values for each of the arguments follow these general rules: +- [USE_CASE](#use_case) +- [MODEL](#model) +- [DATA](#data) +- [OPTIM_LEVEL](#optim_level) (Optional) + +### USE_CASE + +The generally supported use cases for Sparsify are: +- `cv-classification` +- `cv-detection` +- `cv-segmentation` +- `nlp-question_answering` +- `nlp-text_classification` +- `nlp-sentiment_analysis` +- `nlp-token_classification` +- `nlp-named_entity_recognition` + +Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. +Sparsify will automatically recognize these aliases and apply the correct use case. + +Currently, custom use cases are not supported for Training-Aware Experiments. + +#### MODEL + +Models are optional for the Sparse-Transfer pathway. +If no model is provided, a performance and accuracy balanced base model for the use case will be chosen. + +If you choose to override the model, it is expected to be a pre-sparsified model and adhere to the following formats depending on the use case: +- `cv-classification`: SparseML PTH Format + - [Image Classification Models Guide](./models-guide#image-classification) +- `cv-detection` - YOLOv5/YOLOv8 Format + - [Object Detection Models Guide](./models-guide#object-detection) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/labels/{SPLIT}/{IMAGE.EXT}) +- `cv-segmentation` - YOLOv5/YOLOv8 Format + - [Image Segmentation Models Guide](./models-guide#image-segmentation) +- `nlp-*`: Hugging Face Format + - [NLP Models Guide](./models-guide#nlp) + +Currently, custom use cases are not supported for model representation and models must conform to the definitions above. +In the near future, these will be supported through plugin specifications. + +For full details on Sparsify models, read the [Sparsify Models Guide](./models-guide.md). + +#### DATA + +For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. +Due to the varied ML pipelines and implementations, Sparsify standardizes on a few popular formats for datasets. +Confirm that your data is formatted properly according to the standards listed below. + +Different use cases may require different input formats depending on what is considered standard for that use case. +Specifically, the following are the supported formats as well as links to specs and guides for creating datasets for each format: +- `cv-classification`: Image Folder Format + - [Image Classification Dataset Guide](./datasets-guide#image-classification) + - Example structure: data/{SPLIT}/{CLASS}/{IMAGE.EXT}) +- `cv-detection` - YOLO Format + - [Object Detection Dataset Guide](./datasets-guide#object-detection) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/labels/{SPLIT}/{IMAGE.EXT}) +- `cv-segmentation` - YOLO Format + - [Image Segmentation Dataset Guide](./datasets-guide#image-segmentation) + - Example structure: data/classes.txt; data/images/{SPLIT}/{IMAGE.EXT}; data/annotations/{SPLIT}/{IMAGE.EXT}) +- `nlp-*`: Hugging Face CSV or JSONW Format + - [NLP Dataset Guide](./datasets-guide#nlp) + - Example structure: data/{SPLIT}.csv or data/{SPLIT}.jsonl or data/{SPLIT}.json + +Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. +In the near future, these will be supported through plugin specifications. + +For full details on Sparsify datasets, read the [Sparsify Datasets Guide](./datasets-guide.md). + +#### OPTIM_LEVEL + +When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. +Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. +At the max range, though, you may see a drop in accuracy. + +Given that Training-Aware is applied while training, the sparsity ranges are increased as compared to one shot since accuracy recovery is easier at higher sparsities. +The specific ranges are the following: +- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. +- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. +- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 60% for 0.3 to 95% for 1.0 with linear scaling between. + Additionally, INT8 quantization of the model is applied. + +The default of 0.5 will result in a ~70% sparse model with INT8 quantization, and is a good default to start with. + +## Examples + +Check back in soon for walkthroughs and examples of One-Shot Experiments applied to various popular models and use cases. + +### Next Steps + +Now that you have successfully run a Training-Aware Experiment, check out the following guides to continue your Sparsify journey: +- [One-Shot Experiment Guide](./one-shot-experiment-guide.md) +- [Sparse Transfer Experiment Guide](./sparse-transfer-experiment-guide.md) + +### Resources + +To learn more about Sparsify and the available pathways other than Training-Aware Experiments, refer to the [Sparsify README](../README.md). diff --git a/docs/training-aware_experiment-guide.md b/docs/training-aware_experiment-guide.md deleted file mode 100644 index 1225547a..00000000 --- a/docs/training-aware_experiment-guide.md +++ /dev/null @@ -1,302 +0,0 @@ - - - - - - -# Sparsify Training-Aware Experiment Guide - -## Overview -1. Training-Aware Experiment Overview -2. Training-Aware CLI Quickstart -3. Training-Aware Cloud Quickstart -4. Next Steps -5. Resources - - - -#### Training-Aware Experiments - -| Sparsity | Sparsification Speed | Accuracy | -|-----------|-----------------------|-----------| -| **+++++** | **++** | **+++++** | - -Training-Aware Experiments are the most accurate way to create a faster and smaller model for your dataset. -The algorithms are applied to the model during training, so they offer the best possible recovery of accuracy. -However, they do require additional training time and hyperparameter tuning to achieve the best results. - -Generally, Training-Aware Experiments result in a 6–12x speedup with minimal accuracy loss. They are ideal when you have the time to train a model, have a custom model, or want to achieve the best possible accuracy. - - -### Training-Aware CLI Quickstart - -Now that you understand what a Training-Aware Experiment is and the benefits, including the best possible recovery of accuracy for an optimized model, you can now use the CLI to effectively run a Training-Aware Experiment. - -Before you run a Training-Aware Experiment, confirm you are logged in to the Sparsify CLI. For instructions on Installation and Setup, review the [Sparsify Install and Setup Section](READMEsection.com) in the Sparsify README. - -Training-Aware Experiments use the following general command: - -```bash -sparsify.run training-aware --use-case USE_CASE --model MODEL --data DATA --optim-level OPTIM_LEVEL -``` - -The values for each of the arguments follow these general rules: -- [**`USE_CASE`** ](#use_case) -- [**`MODEL`**](#model) -- [**`DATA`**](#data) -- [**`OPTIM_LEVEL`**](#optim_level) - -#### USE_CASE - -The generally supported use cases for Sparsify are: - -- CV - classification: `cv-classification` -- CV - detection: `cv-detection` -- CV - segmentation: `cv-segmentation` -- NLP - question answering: `nlp-question_answering` -- NLP - text classification: `nlp-text_classification` -- NLP - sentiment analysis: `nlp-sentiment_analysis` -- NLP - token classification: `nlp-token_classification` -- NLP - named entity recognition: `nlp-named_entity_recognition` - -Note that other aliases are recognized for these use cases, such as image-classification for cv-classification. Sparsify will automatically recognize these aliases and apply the correct use case. - -For Training-Aware Experiments, custom use cases are only supported with the APIs for custom integrations. This is because non-custom integrations utilize plugins that correspond to the appropriate use case for training pipelines. To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. More information on this specific pathway will be available in the near future as Sparsify's development progresses. - -For full details on Sparsify use cases, read the [Sparsify Use Cases Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/use-cases-guide.md). - -#### MODEL - - -The PyTorch model format is the supported model format for Training-Aware Experiments. The exact format will depend on the pipeline, and therefore the use case, for the Training-Aware Experiment. - -#### DATA - -For all Sparsify Experiments, you will need to provide a dataset to create a sparse model. -Due to the varied ML pipelines and implementations, Sparsify standardizes on a few, popular formats for datasets. -Confirm that your data is formatted properly according to the standards listed below. - -##### Predefined Use Cases - -Training-Aware Experiments utilize specific dataset standards depending on the use case. -Each one is listed below with an example. - -##### Image Classification - -For image classification tasks, Sparsify relies on the dataset format standard used by the PyTorch ImageFolder class. -This format is fairly simple and intuitive, and it is also widely used in the machine-learning community. - -##### Specifications - -- The root folder should contain subdirectories, each representing a single class of images. -- Images of a particular class/category should be placed inside the corresponding subdirectory. -- The subdirectory name is used as the class label and should be unique for each class. -- The images should be in a format readable by the Python Imaging Library (PIL), which includes formats such as .jpeg, .png, .bmp, etc. -- Images do not need to be of the same size. - -The PyTorch ImageFolder class automatically assigns numerical class labels to the images based on the lexicographical order of their class directories. -Therefore, it is crucial to ensure the directories are properly named to avoid any confusion or mislabeling. - -##### Image Classification Example - -For an image classification task involving dogs and cats, the dataset directory should be structured as follows: - -``` -root/dog/xxx.png -root/dog/xxy.png -root/dog/xxz.png - -root/cat/123.png -root/cat/nsa.png -root/cat/asd.png -``` - -In this example, all images within the 'dog' subdirectory will be labeled as 'dog', and all images within the 'cat' subdirectory will be labeled as 'cat'. -The exact filenames ('xxx.png', 'xxy.png', etc.) do not matter; what matters is the directory structure and the directory names. - -By organizing the data in this way, it can be easily read and labeled by the PyTorch ImageFolder class, and thus easily used for training image classification models in Sparsify. - -Note that the class labels ('dog', 'cat') are case-sensitive and the order of the classes would be sorted lexicographically. -Here, 'cat' will be considered class 0, and 'dog' will be class 1, due to alphabetical order. - -##### Object Detection / Image Segmentation - -For object detection and image segmentation tasks, Sparsify supports the dataset format used by YOLOv5. -This format is specifically designed for tasks involving bounding boxes and segmentation masks and is widely adopted in the community. - -##### Specifications - -- Images should be stored in a common directory, generally named `images`. -- Annotations for the images should be stored in a separate directory, often named `labels`. -- Images can be in formats readable by OpenCV (e.g. .jpg, .png). -- Each image should have a corresponding annotation file. The annotation files should be in plain text format (.txt). -- The name of the annotation file should be the same as the corresponding image file, except with a .txt extension. -- Annotation files for object detection should contain one line for each object in the image. Each line should be in the format: ` `, where the values are normalized relative to the size of the image. -- Annotation files for image segmentation should contain information about the segmentation masks. - -##### Object Detection / Image Segmentation Example - -For an object detection task involving detecting cars and pedestrians, the dataset directory should be structured as follows: - -``` -dataset/ -├── images/ -│ ├── image1.jpg -│ └── image2.jpg -└── labels/ - ├── image1.txt - └── image2.txt -``` - -For `image1.jpg`, if there's a car and a pedestrian in the image, the corresponding `image1.txt` file could look like this: - -``` -0 0.5 0.6 0.2 0.3 -1 0.7 0.8 0.1 0.2 -``` - -This would mean that there is an object of class 0 (car) centered at (50% of the image width, 60% of the image height) and having a width of 20% of the image width and a height of 30% of the image height. -The second line is similar but for an object of class 1 (pedestrian). - -For image segmentation, the labels might be more complex, including segmentation masks that indicate which pixels belong to which object category. - -Make sure the class labels are consistent with what is expected by the YOLOv5 configuration you are using, and that the bounding box coordinates are normalized as described above. - -##### Natural Language (NLP/NLG) - -For natural language processing (NLP) and natural language generation (NLG) tasks, Sparsify supports the dataset formats used by the Hugging Face library. -Hugging Face datasets can be represented in various file formats including JSON, CSV, and JSON lines format (.jsonl). - -##### Specifications - -- Each row or line in your data file should represent a single example. -- The data must include the features necessary for your task. For example, a dataset for text classification might include 'text' and 'label' fields. -- For JSON files, each line should be a separate, self-contained JSON object. -- For CSV files, the first row should include the column names, and each subsequent row should include the fields for a single example. -- The file should be UTF-8 encoded to support a wide range of text inputs. - -##### Natural Language (NLP/NLG) Example - -Here is an example of how you might structure a dataset for a sentiment analysis task: - -If you're using a JSON lines (.jsonl) format, your file could look like this: - -``` -{"text": "I love this movie!", "label": "positive"} -{"text": "This movie was awful.", "label": "negative"} -{"text": "I have mixed feelings about this film.", "label": "neutral"} -``` - -Each line is a separate JSON object, representing a single example. - -If you are using a CSV format, your file could look like this: - -``` -text,label -"I love this movie!","positive" -"This movie was awful.","negative" -"I have mixed feelings about this film.","neutral" -``` - -The first row contains the column names, and each subsequent row represents a single example. - -Whether you choose to use JSON lines or CSV will depend on your specific needs and preferences, but either format will work well with Hugging Face and Sparsify. -Make sure your data is formatted correctly according to these specifications to ensure it can be used in your experiments. - -##### Custom Use Cases -Currently, custom use cases are not supported for dataset representation and datasets must conform to the definitions above. In the near future, these will be supported through plugin specifications. - -For full details on Sparsify datasets, read the [Sparsify Datasets Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/datasets-guide.md#sparsify-datasets-guide). - -#### OPTIM_LEVEL - -When using Sparsify, the optim (sparsification) level is one of the top arguments you should decide on. Specifically, it controls how much sparsification is applied to your model with higher values resulting in faster and more compressed models. At the max range, though, you may see a drop in accuracy. - -The optim level can be set anywhere from 0.0 to 1.0, where 0.0 is for no sparsification and 1.0 is for maximum sparsification. -0.5 is the default optim level and is a good starting point for most use cases. - -##### Training-Aware Optim Levels - - -Given that Training-Aware is applied while training, the sparsity ranges are increased as compared to one shot since accuracy recovery is easier at higher sparsities. - -The specific ranges are the following: - -- optim-level == 0.0: no sparsification is applied and the input model is returned as a baseline test case. -- optim-level < 0.3: INT8 quantization of the model (activations and weights) is applied. -- optim-level >= 0.3: unstructured pruning (sparsity) is applied to the weights of the model from 60% for 0.3 to 95% for 1.0 with linear scaling between. - Additionally, INT8 quantization of the model is applied. - -The default of 0.5 will result in a ~70% sparse model with INT8 quantization. - - -### Example Training-Aware Experiment CLI Commands - -Here are code examples of Training-Aware Experiments you may wish to run; pick your use case and start sparsifying with Training-Aware! - -#### Running Training-Aware Experiments - -##### Computer Vision Use Case: - -You have an image classification use case and want to run a Training-Aware Experiment on a dense ResNet-50 model using the imagenette dataset. You want to ensure you have the most sparsity to get the best possible performance and maintain a high level of accuracy. - -You are targeting a balanced model in terms of wanting to get a 6-12x performance boost in latency while also maintaining the high accuracy of the model so that you can confidently deploy the model in production to solve your business case. - -You can use a Sparsify Training-Aware Experiment to try and reach your goal. Training-Aware Experiments apply SOTA optimization techniques during training to generate a highly optimized sparse model with very little to no impact on accuracy. Since you want to get the most possible performance speedup in latency and need a high level of accuracy, a Training-Aware Experiment makes the most sense for you for its highly optimized, performant sparsity profile as well as high accuracy profile. - -With all of these considerations in mind, run the following Training-Aware Experiment command to achieve your use case goal: -```bash -sparsify.run training-aware --use-case image_classification --model resnet50 --data imagenette --optim-level 0.5 -``` - - -The output is as follows: - -MARK - -##### NLP Use Case: -You are working on a text classification use case to help classify text reviews received from your customers through your e-commerce website. You have been having slow inference times using the BERT-base model, but have an accurate model that you want to ensure does not take a large hit. - -You are targeting a balanced model, but are targeting a significant 6-12x performance boost in text classification throughput while also maintaining the highest level of accuracy with the model so that you can confidently deploy the model in production to solve your business case. You are focused on improving the throughput of the model to process more requests, but sacrifice as few points in accuracy as possible. - -You are targeting a balanced model in terms of wanting to get a 6-12x performance boost in throughput while losing little to no accuracy so your classifications are actionable. - -You can use a Sparsify Training-Aware Experiment to try and reach your goal. Since you want to use the SST2 dataset on BERT-base to get the highest performing model with the lowest accuracy hit, a Training-Aware Experiment makes the most sense for you for its highly optimized, performant sparsity profile as well as high accuracy profile. - -With all of these considerations in mind, run the following Training-Aware Experiment command to achieve your use case goal: - -```bash -sparsify.run training-aware --use-case text_classification --model bert-base --data sst2 --optim-level 0.5 -``` -The output is as follows: -MARK - - -### Training-Aware Cloud Quickstart - -In addition to manually creating commands, you use Sparsify Cloud to generate Sparsify Training-Aware Experiment commands. - -To get started, read the [Sparsify Cloud User Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/cloud-user-guide.md). - - -### Next Steps - -Now that you have successfully run a Training-Aware Experiment, check out the [One-Shot](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot_experiment-guide.md) and [Sparse-Transfer](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer_experiment-guide.md) Experiments to target different sparsity profiles. - - -### Resources -To learn more about Sparsify and all of the available pathways outside of Training-Aware Experiments, refer to the [Sparsify README](https://github.com/neuralmagic/sparsify). diff --git a/docs/use-cases-guide.md b/docs/use-cases-guide.md deleted file mode 100644 index b234e039..00000000 --- a/docs/use-cases-guide.md +++ /dev/null @@ -1,57 +0,0 @@ - - -# Sparsify Use Cases Guide - -To use Sparsify, you must specify a use case for all experiments to run. -A use case is the specific task or domain/sub-domain you wish to sparsify a model for, such as image classification, object detection, or text classification. -It is used to enable Sparsify to apply the best sparsification techniques for your use case, to automatically package the model for deployment, and, depending on what is run, to load specific pipelines for data loading and training. - -## Use Cases - -The generally supported use cases for Sparsify currently are: -- CV - classification: `cv-classification` -- CV - detection: `cv-detection` -- CV - segmentation: `cv-segmentation` -- NLP - question answering: `nlp-question_answering` -- NLP - text classification: `nlp-text_classification` -- NLP - sentiment analysis: `nlp-sentiment_analysis` -- NLP - token classification: `nlp-token_classification` -- NLP - named entity recognition: `nlp-named_entity_recognition` - -Note, other aliases are recognized for these use cases, such as image-classification for cv-classification. -Sparsify will automatically recognize these aliases and apply the correct use case. - -### Custom Use Cases - -If you wish to use Sparsify for a use case that is not in the list of currently supported use cases, you can use a custom use case for some pathways in Sparsify. -The custom use cases will be saved in Sparsify Cloud for future reuse when run through a supported pathway. -The pathways that support custom use cases are listed below. - -Note that custom use cases will prevent Sparsify from applying known, domain-specific knowledge for the sparsification of your model. -Additionally, it will prevent auto-filling of the pre- and post-processing functions when creating a deployment package. - -#### One-Shot - -For One-Shot Experiments, both the CLIs and APIs always will support custom use cases. -To utilize, run a One-Shot Experiment with `--use-case` set to the desired custom use case. - -### Training-Aware - -For Training-Aware Experiments, custom use cases are only supported with the APIs for custom integrations. -This is because non-custom integrations utilize plugins that correspond to the appropriate use case for training pipelines. -To utilize this, ensure that you have a training pipeline ready to go and inject the Sparsify API into the training pipeline with the desired use case passed in as an argument. -More information on this specific pathway will be available in the near future as Sparsify's development progresses. From e25ed31a76921ccb030c1c98c53f06d055b1c12d Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Thu, 20 Jul 2023 14:49:00 -0400 Subject: [PATCH 42/47] Update deployment_instructions.md --- src/sparsify/auto/tasks/deployment_instructions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md index 34866ace..2b4b61fa 100644 --- a/src/sparsify/auto/tasks/deployment_instructions.md +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -78,7 +78,7 @@ The server deployment packages up the model inference and any pre- and post-proc To start the server, run the following command with the appropriate arguments: ​ ```bash -deepsparse.server --task TASK --model_path ./deployment +deepsparse.server --task TASK --model_path ./deployment/model.onnx ``` ​ For more information on the `deepsparse.server` command, see the [Server Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-server.md). From a75c0a2a895c85f66e95e640ab28f0a6d9df13a4 Mon Sep 17 00:00:00 2001 From: Rob Greenberg <100797996+rgreenberg1@users.noreply.github.com> Date: Thu, 20 Jul 2023 15:47:42 -0400 Subject: [PATCH 43/47] Update README.md (#271) --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b2a3b0cb..fa09db43 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ tar -xzf sst2_calibration.tar.gz sparsify.run one-shot --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data ./sst2_calibration --optim-level 0.5 ``` -To dive deeper into One-Shot Experiments, read through the [One-Shot Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot_experiment-guide.md). +To dive deeper into One-Shot Experiments, read through the [One-Shot Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/one-shot-experiment-guide.md). Note, One-Shot Experiments currently require the model to be in an ONNX format and the dataset to be in a NumPy format. @@ -227,7 +227,7 @@ Or, to sparse transfer a SparseZoo model to the SST2 dataset for sentiment analy sparsify.run sparse-transfer --use-case text_classification --data sst2 --optim-level 0.5 ``` -To dive deeper into Sparse-Transfer Experiments, read through the [Sparse-Transfer Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer_experiment-guide.md). +To dive deeper into Sparse-Transfer Experiments, read through the [Sparse-Transfer Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/sparse-transfer-experiment-guide.md). Note, Sparse-Transfer Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or Hugging Face Transformers. @@ -258,7 +258,7 @@ Or, to sparsify a BERT model on the SST2 dataset for sentiment analysis, run the sparsify.run training-aware --use-case text_classification --model "zoo:nlp/sentiment_analysis/bert-base/pytorch/huggingface/sst2/base-none" --data sst2 --optim-level 0.5 ``` -To dive deeper into Training-Aware Experiments, read through the [Training-Aware Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware_experiment-guide.md). +To dive deeper into Training-Aware Experiments, read through the [Training-Aware Experiment Guide](https://github.com/neuralmagic/sparsify/blob/main/docs/training-aware-experiment-guide.md). Note that Training-Aware Experiments require the model to be saved in a PyTorch format corresponding to the underlying integration such as Ultralytics YOLOv5 or Hugging Face Transformers. From e1c50880315f07de6315ce4a55b86699ae531662 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Fri, 21 Jul 2023 09:48:58 -0400 Subject: [PATCH 44/47] move empty directory removal to sparseml instead of sparsify (#273) --- src/sparsify/auto/tasks/image_classification/runner.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/sparsify/auto/tasks/image_classification/runner.py b/src/sparsify/auto/tasks/image_classification/runner.py index 7fcc30af..665c1954 100644 --- a/src/sparsify/auto/tasks/image_classification/runner.py +++ b/src/sparsify/auto/tasks/image_classification/runner.py @@ -77,11 +77,6 @@ def config_to_args( if "dataset" not in config.kwargs: # custom datasets are set to imagefolder config.kwargs["dataset"] = "imagefolder" - if not os.path.exists(config.dataset): - raise FileNotFoundError( - f"The custom dataset {config.dataset} " - "does not exist. Please ensure that the path provided is correct." - ) if "model_tag" not in config.kwargs: config.kwargs["model_tag"] = "sparsify_auto_image_classification" From 71c031d2726d8def6bbc0bc2755d2cd9123991d1 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Tue, 25 Jul 2023 10:09:31 -0400 Subject: [PATCH 45/47] Update link in deployment_instructions.md (#275) Update buddy.jpeg link to a downloadable link --- src/sparsify/auto/tasks/deployment_instructions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparsify/auto/tasks/deployment_instructions.md b/src/sparsify/auto/tasks/deployment_instructions.md index 2b4b61fa..be432b82 100644 --- a/src/sparsify/auto/tasks/deployment_instructions.md +++ b/src/sparsify/auto/tasks/deployment_instructions.md @@ -66,7 +66,7 @@ custom_pipeline = CustomTaskPipeline( ​ scores, probs = custom_pipeline("buddy.jpeg") ``` -(Note: Download [buddy.jpeg](https://github.com/neuralmagic/deepsparse/blob/main/tests/deepsparse/pipelines/sample_images/buddy.jpeg)) +(Note: Download [buddy.jpeg](https://raw.githubusercontent.com/neuralmagic/deepsparse/main/tests/deepsparse/pipelines/sample_images/buddy.jpeg)) ​ For more information on the available pipelines and how to create custom pipelines, see the [Pipeline Deployment Guide](https://github.com/neuralmagic/deepsparse/blob/main/docs/user-guide/deepsparse-benchmarking.md). From e3133b82b24ad7fdc2d1f00ff08b4cb7756bfdd8 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Thu, 27 Jul 2023 10:25:23 -0400 Subject: [PATCH 46/47] Update to python3.8 (#276) --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- DEVELOPING.md | 2 +- setup.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dd9376a1..aa965688 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -14,7 +14,7 @@ A clear and concise description of what you expected to happen. **Environment** Include all relevant environment information: 1. OS [e.g. Ubuntu 18.04]: -2. Python version [e.g. 3.7]: +2. Python version [e.g. 3.8]: 3. Sparsify version or commit hash [e.g. 0.1.0, `f7245c8`]: 4. ML framework version(s) [e.g. torch 1.7.1]: 5. Other Python package versions [e.g. SparseZoo, DeepSparse, numpy, ONNX]: diff --git a/DEVELOPING.md b/DEVELOPING.md index 25d27350..0fd7d95f 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -16,7 +16,7 @@ limitations under the License. # Developing Sparsify -Sparsify is developed and tested using Python 3.7-3.9. +Sparsify is developed and tested using Python 3.8-3.9. To develop Sparsify, you will also need the development dependencies and to follow the styling guidelines. Here's some details to get started. diff --git a/setup.py b/setup.py index 83cd1d8d..57d161dc 100644 --- a/setup.py +++ b/setup.py @@ -114,12 +114,12 @@ def _setup_long_description() -> Tuple[str, str]: install_requires=_setup_install_requires(), extras_require=_setup_extras(), entry_points=_setup_entry_points(), - python_requires=">=3.7.0", + python_requires=">=3.8.0", classifiers=[ "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Intended Audience :: Developers", From 7d5b20dc8dfdbbbbd27958afa2b59e3ac850c9d8 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Thu, 24 Aug 2023 12:36:01 -0400 Subject: [PATCH 47/47] LLM Finetune Functionality (#277) * initial llmfoundry finetune functionality * update docstring with samples * fix quality * refactor to use TaskRunner; add ddp support * add trainhook for single gpu/cpu run * add enum for llm datatypes, use task info for finetune pathways, update docstring * add click for arguments, add finetune args, update entrypoints * add try/except around imports * quality * PR comments * LLM finetune sparsify masking (#278) * add functions to mask weights during finetuneing * update logic for loading weights * update yaml * update mask name * add logic to update batchsize based on gpu count * make sparsify requirements less broad; move sparseml[transformers] to nm deps * remove flash-attn * quality --- MANIFEST.in | 1 + setup.py | 20 +- .../samples/finetune_llmfoundry_sample.yaml | 134 +++++++ src/sparsify/auto/scripts/main.py | 18 +- src/sparsify/auto/tasks/finetune/__init__.py | 26 ++ src/sparsify/auto/tasks/finetune/args.py | 34 ++ src/sparsify/auto/tasks/finetune/finetune.py | 372 ++++++++++++++++++ src/sparsify/auto/tasks/finetune/helpers.py | 62 +++ src/sparsify/auto/tasks/finetune/runner.py | 75 ++++ src/sparsify/auto/tasks/runner.py | 16 +- .../auto/tasks/transformers/__init__.py | 12 + src/sparsify/auto/utils/error_handler.py | 2 +- src/sparsify/schemas/auto_api.py | 4 +- src/sparsify/utils/constants.py | 6 + 14 files changed, 766 insertions(+), 16 deletions(-) create mode 100644 src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml create mode 100644 src/sparsify/auto/tasks/finetune/__init__.py create mode 100644 src/sparsify/auto/tasks/finetune/args.py create mode 100644 src/sparsify/auto/tasks/finetune/finetune.py create mode 100644 src/sparsify/auto/tasks/finetune/helpers.py create mode 100644 src/sparsify/auto/tasks/finetune/runner.py diff --git a/MANIFEST.in b/MANIFEST.in index 42d20225..daded415 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include src/sparsify/ui/ * include LICENSE include src/sparsify/auto/tasks/deployment_instructions.md +include src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml diff --git a/setup.py b/setup.py index 57d161dc..3c38dfdb 100644 --- a/setup.py +++ b/setup.py @@ -26,11 +26,10 @@ # load and overwrite version and release info from sparseml package exec(open(os.path.join("src", "sparsify", "version.py")).read()) print(f"loaded version {version} from src/sparsify/version.py") -version_nm_deps = f"{version_major_minor}.0" +version_nm_deps = f"{version_major_minor}.0.202308" _PACKAGE_NAME = "sparsify" if is_release else "sparsify-nightly" - _deps = [ "pydantic>=1.8.2,<2.0.0", "pyyaml>=5.0.0", @@ -39,13 +38,14 @@ "setuptools>=56.0.0", "optuna>=3.0.2", "onnxruntime-gpu", -] -_nm_deps = [ f"{'sparsezoo' if is_release else 'sparsezoo-nightly'}~={version_nm_deps}", - f"{'sparseml' if is_release else 'sparseml-nightly'}[torchvision,transformers,yolov5]~={version_nm_deps}", # noqa E501 f"{'deepsparse' if is_release else 'deepsparse-nightly'}~={version_nm_deps}", + f"{'sparseml' if is_release else 'sparseml-nightly'}[torchvision,yolov5]~={version_nm_deps}", # noqa E501 ] +_nm_deps = [ + f"{'sparseml' if is_release else 'sparseml-nightly'}[transformers]~={version_nm_deps}", # noqa E501 +] _dev_deps = [ "black>=20.8b1", @@ -56,6 +56,11 @@ "fastai>=2.7.7", ] +_llm_deps = [ + "llm-foundry==0.2.0", + f"{'nm-transformers' if is_release else 'nm-transformers-nightly'}", +] + def _setup_packages() -> List: return find_packages( @@ -68,11 +73,11 @@ def _setup_package_dir() -> Dict: def _setup_install_requires() -> List: - return _nm_deps + _deps + return _deps def _setup_extras() -> Dict: - return {"dev": _dev_deps} + return {"dev": _dev_deps, "_nm_deps": _nm_deps, "llm": _llm_deps} def _setup_entry_points() -> Dict: @@ -81,6 +86,7 @@ def _setup_entry_points() -> Dict: "sparsify.run=sparsify.cli.run:main", "sparsify.login=sparsify.login:main", "sparsify.check_environment=sparsify.check_environment.main:main", + "finetune=sparsify.auto.tasks.finetune.finetune:parse_args_and_run", ] } diff --git a/src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml b/src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml new file mode 100644 index 00000000..6b2f0c1d --- /dev/null +++ b/src/sparsify/auto/samples/finetune_llmfoundry_sample.yaml @@ -0,0 +1,134 @@ +max_seq_len: 2048 +global_seed: 17 +model_name_or_path: mosaicml/mpt-7b-instruct +load_path: /storage/dsikka/mpt_7b_instruct_oneshot_sp70.pt +precision: amp_bf16 + +max_duration: 1ep +eval_interval: 1ep +# eval_subset_num_batches: 3 # use this for quick testing +eval_first: true +seed: ${global_seed} + +global_train_batch_size: 1 +# for mpt-7b dense: +# 4 x A100_80GB = "device_train_microbatch_size: 12" +# 8 x A6000_48GB = "device_train_microbatch_size: 6" + +# for mpt-7b sparse (with masks): +# 8 x A6000_48GB = "device_train_microbatch_size: 4" +device_train_batch_size: 1 +device_train_microbatch_size: 1 +device_eval_batch_size: 1 + +# Run Name +run_name: test_run + +model: + name: hf_causal_lm + pretrained: true + pretrained_model_name_or_path: mosaicml/mpt-7b-instruct + max_seq_len: ${max_seq_len} + config_overrides: + attn_config: + attn_impl: torch + # Set this to `true` if using `train_loader.dataset.packing_ratio` below + attn_uses_sequence_id: true + +# Tokenizer +tokenizer: + name: EleutherAI/gpt-neox-20b + kwargs: + model_max_length: ${max_seq_len} + +# Dataloaders +train_loader: + name: finetuning + dataset: + hf_name: mosaicml/dolly_hhrlhf + split: train + max_seq_len: ${max_seq_len} + allow_pad_trimming: false + decoder_only_format: true + # # Use `python llmfoundry/data/packing.py --yaml-path /path/to/this/yaml/ ...` + # # to profile this run's optimal packing_ratio as it depends on GPU count, + # # batch size, sequence length + packing_ratio: 13 # padding=0.36%, waste=0.79% + shuffle: true + drop_last: false + num_workers: 8 + pin_memory: false + prefetch_factor: 2 + persistent_workers: true + timeout: 0 + +eval_loader: + name: finetuning + dataset: + hf_name: mosaicml/dolly_hhrlhf + split: test + max_seq_len: ${max_seq_len} + allow_pad_trimming: false + decoder_only_format: true + packing_ratio: 13 + shuffle: false + drop_last: false + num_workers: 8 + pin_memory: false + prefetch_factor: 2 + persistent_workers: true + timeout: 0 + +# Optimization +scheduler: + name: linear_decay_with_warmup + t_warmup: 20ba + alpha_f: 0 + +optimizer: + name: decoupled_adamw + lr: 1e-4 + betas: + - 0.9 + - 0.999 + eps: 1.0e-8 + weight_decay: 0.0 + +# we can't use gradient clipping for sparse training runs because we don't have +# a way to mask gradients of pruned weights, and thus the global gradient norm +# will be incorrect +# algorithms: +# gradient_clipping: +# clipping_type: norm +# clipping_threshold: 1.0 + +# FSDP +fsdp_config: + sharding_strategy: FULL_SHARD + mixed_precision: FULL + activation_checkpointing: true + activation_checkpointing_reentrant: false + activation_cpu_offload: false + limit_all_gathers: true + verbose: false + +# Logging +progress_bar: false +log_to_console: true +console_log_interval: 1ba + +callbacks: + speed_monitor: + window_size: 10 + lr_monitor: {} + memory_monitor: {} + runtime_estimator: {} + +loggers: + tensorboard: {} + +# Checkpoint to local filesystem or remote object store +save_interval: 1ep +save_num_checkpoints_to_keep: 1 # Important, this cleans up checkpoints saved to DISK +save_folder: output_dir/{run_name}/checkpoints +save_overwrite: true \ No newline at end of file diff --git a/src/sparsify/auto/scripts/main.py b/src/sparsify/auto/scripts/main.py index 0cadd25c..365c8e7b 100644 --- a/src/sparsify/auto/scripts/main.py +++ b/src/sparsify/auto/scripts/main.py @@ -25,6 +25,7 @@ ) from sparsify.schemas import APIArgs from sparsify.schemas.auto_api import SparsificationTrainingConfig +from sparsify.utils import get_task_info from tensorboard.program import TensorBoard from tensorboard.util import tb_logging @@ -42,6 +43,18 @@ def main(api_args: APIArgs): deploy_directory, ) = create_save_directory(api_args) + if api_args.task in get_task_info("finetune").aliases: + _LOGGER.info( + "Running finetuning. " + "Currently only arguments passed for use-case and data will be considered" + ) + config = SparsificationTrainingConfig( + task=api_args.task, dataset=api_args.dataset, base_model=None, recipe=None + ) + runner = TaskRunner.create(config) + runner.train(train_directory=train_directory, log_directory=log_directory) + return + _suppress_tensorboard_logs() # Launch tensorboard server @@ -51,16 +64,17 @@ def main(api_args: APIArgs): _LOGGER.info(f"TensorBoard listening on {url}") # Request config from api and instantiate runner + raw_config = api_request_config(api_args) config = SparsificationTrainingConfig(**raw_config) - runner = TaskRunner.create(config) + runner = TaskRunner.create(config) # Execute integration run and return metrics metrics = runner.train(train_directory=train_directory, log_directory=log_directory) + yaml.safe_dump( metrics.dict(), (Path(train_directory).parent / "metrics.yaml").open("w") ) - runner.export(model_directory=train_directory) runner.create_deployment_directory( train_directory=train_directory, deploy_directory=deploy_directory diff --git a/src/sparsify/auto/tasks/finetune/__init__.py b/src/sparsify/auto/tasks/finetune/__init__.py new file mode 100644 index 00000000..c3b6bcb6 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/__init__.py @@ -0,0 +1,26 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +from .args import * + + +try: + from .finetune import * + from .runner import * +except ImportError as exception: + raise ImportError( + "To use the llm finetuning pathway, please install sparsify[llm]" + ) from exception diff --git a/src/sparsify/auto/tasks/finetune/args.py b/src/sparsify/auto/tasks/finetune/args.py new file mode 100644 index 00000000..7e8e3389 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/args.py @@ -0,0 +1,34 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pydantic import Field +from sparsify.auto.tasks import BaseArgs + + +__all__ = ["FineTuneTrainArgs"] + + +class FineTuneTrainArgs(BaseArgs): + yaml: str = Field( + default=None, + description="path to the training yaml", + ) + checkpoints: str = Field( + default=None, + description="path to the directory to store checkpoints", + ) + logging: str = Field( + default=None, + description="path to store logs", + ) diff --git a/src/sparsify/auto/tasks/finetune/finetune.py b/src/sparsify/auto/tasks/finetune/finetune.py new file mode 100644 index 00000000..ce113d81 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/finetune.py @@ -0,0 +1,372 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from enum import Enum +from pathlib import Path +from typing import Dict, Tuple, Union + +import torch +from torch.utils.data import DataLoader + +import click +from composer import Trainer +from composer.core import Evaluator +from composer.models import HuggingFaceModel +from composer.utils import dist, get_device, reproducibility +from llmfoundry import ( + COMPOSER_MODEL_REGISTRY, + build_finetuning_dataloader, + build_text_denoising_dataloader, +) +from llmfoundry.data.text_data import build_text_dataloader +from llmfoundry.utils.builders import ( + build_logger, + build_optimizer, + build_scheduler, + build_tokenizer, +) +from llmfoundry.utils.config_utils import update_batch_size_info +from omegaconf import DictConfig +from omegaconf import OmegaConf as om +from sparsify.auto.tasks.finetune.helpers import MaskPrunedWeights, attach_masks +from transformers import PreTrainedTokenizerBase + + +__all__ = ["FineTuner"] + +TEXT_DENOISING_MODELS = ["hf_prefix_lm", "hf_t5"] +TEXT_MODELS = ["hf_causal_lm"] + +_LOGGER = logging.getLogger(__name__) +_LOGGER.setLevel(logging.INFO) + + +class LLMDataTypes(Enum): + TEXT = "text" + TEXT_DENOISING = "text_denoising" + FINETUNING = "finetuning" + + +class FineTuner: + + """ + LLMFinetuner which allows finetuning of LLM Models using llmfoundry. Finetuning is + heavily dependent on providing a llmfoundary-compliant yaml file which sets up + the training, including which pretrained model to pull as well as the data that is + to be used for finetuning. Please see the example yaml under samples or the + llmfoundry repo for additional examples: + https://github.com/mosaicml/llm-foundry/blob/main/scripts/train/finetune_example/ + """ + + def __init__( + self, + dataset_path: Union[str, Path], + train_directory: Union[str, Path], + log_dir: Union[str, Path], + ) -> None: + """ + :param dataset_path: path to the llmfoundry compliant yaml file + :param train_directory: path to log the checkpoints for the model + :param log_dir: path to store the specified logger (such as tensorboard) + + """ + if os.path.exists(dataset_path): + if Path(dataset_path).suffix not in [".yaml", ".yml"]: + raise RuntimeError( + "LLMFinetuner expects a yaml file compliant with llmfoundry." + ) + with open(dataset_path) as yaml_file: + self._train_config = om.load(yaml_file) + else: + raise FileNotFoundError( + f"{dataset_path} does not exist. Plase ensure " + " the yaml file exists and the path provided is correct." + ) + + if self._train_config.get("loggers"): + for _, log_config in self._train_config["loggers"].items(): + if "log_dir" in log_config: + log_config["log_dir"] = os.path.join(log_dir, log_config["log_dir"]) + else: + log_config["log_dir"] = log_dir + + self._train_config.save_folder = os.path.join( + train_directory, Path(self._train_config.save_folder) + ) + self._model_name = self._train_config["model"]["name"] + self._validate_yaml() + + @property + def model_name(self) -> str: + """ + :return: model name for the LLM + """ + return self._model_name + + def _validate_yaml(self): + """ + Validate that the provided yaml is compatible with llmfoundry. + """ + if not self._train_config.get("train_loader"): + raise ValueError( + "the provided config file is missing details on the train_loader" + ) + + data_loaders = [self._train_config.get("train_loader")] + if self._train_config.get("eval_loader"): + data_loaders.append(self._train_config.get("eval_loader")) + + for loader in data_loaders: + if loader["name"] == LLMDataTypes.TEXT.value: + if self.model_name in TEXT_DENOISING_MODELS: + raise ValueError( + f"Model type {self.model_name} is not supported " + " for text dataloaders. Please use the " + " text_denoising dataloader." + ) + elif loader["name"] == LLMDataTypes.TEXT_DENOISING.value: + if self.model_name in TEXT_MODELS: + raise ValueError( + f"Model type {self.model_name} is not supported " + " for text_denoising dataloaders. Please use the " + " text dataloader." + ) + + def _build_model(self, tokenizer: PreTrainedTokenizerBase) -> HuggingFaceModel: + """ + Based on the model name, pull and return the pretrained hugging face model. + + :param tokenizer: transformers tokenizer + :return: HuggingFaceModel from the mosaicml composer library + """ + if self.model_name not in COMPOSER_MODEL_REGISTRY: + raise ValueError( + "Please ensure the model name provided is one of " + f" {list(COMPOSER_MODEL_REGISTRY.keys())}" + ) + return COMPOSER_MODEL_REGISTRY[self.model_name]( + self._train_config.model, tokenizer + ) + + def _load_weights_and_attach_masks( + self, tokenizer: PreTrainedTokenizerBase + ) -> Tuple[torch.nn.Module, Union[None, "MaskPrunedWeights"]]: + """ + If a load_path is provided, attempt to load in weights from the specified + location. Because the mask may be sparse, attach masks, masking where the + weights have already been pruned. + + :return: tuple including the model with weights loaded from the `load_path` + and with buffers attached for pruning masks. Also returns the MaskPrunedWeights + algorithm. + """ + model = self._build_model(tokenizer) + try: + model.load_state_dict( + torch.load(self._train_config.get("load_path"), map_location="cpu")[ + "state" + ]["model"], + strict=True, + ) + except Exception as e: + _LOGGER.error(f" Failed to load weights. Returning pretrained model {e}") + if self._train_config.model.pretrained is False: + self._train_config.model.pretrained = True + model = self._build_model(tokenizer) + return model, None + + attach_masks(model) + return model, MaskPrunedWeights() + + def _build_dataloaders( + self, + dataloader_config: DictConfig, + tokenizer: PreTrainedTokenizerBase, + device_batch_size: int, + ) -> DataLoader: + """ + Build a torch dataloader given a DictConfig containing details about the + dataloader, the tokenizer that is to be applied to the data, and the batch size + for the dataloader. + + :param dataloader_config DictConfig from the omegaconf library, containing + details on the dataloader + :param tokenizer: transformers tokenizer + :param device_batch_size: batch size for the dataloader + :return: a torch DataLoader + """ + if dataloader_config.name == LLMDataTypes.TEXT.value: + return build_text_dataloader( + dataloader_config, + tokenizer, + device_batch_size, + ) + elif dataloader_config.name == LLMDataTypes.TEXT_DENOISING.value: + return build_text_denoising_dataloader( + dataloader_config, + tokenizer, + device_batch_size, + ) + elif dataloader_config.name == LLMDataTypes.FINETUNING.value: + return build_finetuning_dataloader( + dataloader_config, + tokenizer, + device_batch_size, + ) + + def _get_fsdp_config(self) -> Union[Dict, None]: + """ + Fetch the fsdp configuration. If <= one gpu devices are available, fsdp is + turned off. + + :return: fsdp dictionary if number of cuda devices available is > one, else None + """ + fsdp_config = self._train_config.get("fsdp_config", None) + fsdp_config = ( + om.to_container(fsdp_config, resolve=True) if fsdp_config else None + ) + + if dist.get_world_size() <= 1: + fsdp_config = None + + return fsdp_config + + def _build_trainer(self) -> Trainer: + """ + Build the trainer object. This involves loading the pretrained model, fetching + the tokenizer, and setting up the dataloaders, optimizer, and scheduler. + + :return: mosaicml composer Trainer object + """ + reproducibility.seed_all(self._train_config.seed) + if dist.get_world_size() > 1: + dist.initialize_dist(get_device(None)) + + self._train_config = update_batch_size_info(self._train_config) + + tokenizer = build_tokenizer(self._train_config.tokenizer) + + algorithms = [] + # If a load_path is provided, try loading weights from the provided path + if self._train_config.get("load_path"): + self._train_config.model.pretrained = False + else: + self._train_config.model.pretrained = True + + model, algorithm = self._load_weights_and_attach_masks(tokenizer) + if algorithm: + algorithms.append(algorithm) + + optimizer = build_optimizer(self._train_config.optimizer, model) + scheduler = build_scheduler(self._train_config.scheduler) + + loggers = [ + build_logger(name, logger_cfg) + for name, logger_cfg in (self._train_config.get("loggers") or {}).items() + ] + + train_loader = self._build_dataloaders( + self._train_config.train_loader, + tokenizer, + self._train_config.device_train_batch_size, + ) + eval_loader = Evaluator( + label="eval", + dataloader=self._build_dataloaders( + self._train_config.eval_loader, + tokenizer, + self._train_config.device_eval_batch_size, + ), + metric_names=list(model.train_metrics.keys()), + ) + + trainer = Trainer( + run_name=self._train_config.run_name, + model=model, + train_dataloader=train_loader, + eval_dataloader=[eval_loader], + optimizers=optimizer, + schedulers=scheduler, + loggers=loggers, + algorithms=algorithms, + max_duration=self._train_config.max_duration, + eval_interval=self._train_config.eval_interval, + precision=self._train_config.precision, + fsdp_config=self._get_fsdp_config(), + save_folder=self._train_config.save_folder, + eval_subset_num_batches=self._train_config.get( + "eval_subset_num_batches", -1 + ), + log_to_console=self._train_config.get("log_to_console", False), + progress_bar=self._train_config.get("progress_bar", True), + console_log_interval=self._train_config.get("console_log_interval", "1ba"), + device_train_microbatch_size=self._train_config.get( + "device_train_microbatch_size", "auto" + ), + save_filename=self._train_config.get( + "save_filename", "ep{epoch}-ba{batch}-rank{rank}.pt" + ), + save_latest_filename=self._train_config.get( + "save_latest_filename", "latest-rank{rank}.pt" + ), + save_interval=self._train_config.get("save_interval", "1000ba"), + save_num_checkpoints_to_keep=self._train_config.get( + "save_num_checkpoints_to_keep", 1 + ), + save_overwrite=self._train_config.get("save_overwrite", False), + autoresume=self._train_config.get("autoresume", False), + dist_timeout=self._train_config.get("dist_timeout", 600.0), + ) + return trainer + + def fine_tune(self): + """ + Run finetuning using the trainer object. Finetuned models will be checkpointed + to the coonfigured directory. + """ + trainer = self._build_trainer() + trainer.fit() + + +@click.command() +@click.option("--yaml", default=None, type=str, help="Path to the training yaml") +@click.option( + "--checkpoints", + default=None, + type=str, + help="Path to directory to store checkpoints", +) +@click.option("--logging", default=None, type=str, help="Path to store log") +def parse_args_and_run( + yaml: Union[str, Path], + checkpoints: Union[str, Path], + logging: Union[str, Path], +): + """ + Serves as the entrypoint for ddp LLM finetuning. + + :param yaml: path to the llmfoundry compliant yaml file + :param checkpoints: path to log the checkpoints for the model + :param logging: path to store the specified logger (such as tensorboard) + """ + finetuner = FineTuner(yaml, checkpoints, logging) + finetuner.fine_tune() + + +# train_hook +def main(**kwargs): + finetuner = FineTuner(kwargs["yaml"], kwargs["checkpoints"], kwargs["logging"]) + finetuner.fine_tune() diff --git a/src/sparsify/auto/tasks/finetune/helpers.py b/src/sparsify/auto/tasks/finetune/helpers.py new file mode 100644 index 00000000..b0bfaa47 --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/helpers.py @@ -0,0 +1,62 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch + +from composer.core import Algorithm, Event + + +all = ["attach_masks", "MaskPrunedWeights"] + + +class MaskPrunedWeights(Algorithm): + """ + Composer specific hook which allows us to mask weights after a specific event, + in this case at the end of the batch. Provided as input to the Trainer while + finetuning. Note: can also mask weights before the forward pass by adding + `or event == Event.BATCH_START` + """ + + def match(self, event, state): + return event == Event.BATCH_END + + @torch.no_grad() + def apply(self, event, state, logger): + def mask_weights(module): + if hasattr(module, "constant_pruning_mask"): + module.weight *= module.constant_pruning_mask + + state.model.apply(mask_weights) + + +def attach_masks(model: torch.nn.Module): + """ + Recursively attach masks to weights which have already been pruned to avoid + finetuning them further. + + :param model: torch.nnn.Module to recursively attach masks to if the weights are + already pruned + """ + for _, module in model.named_children(): + if isinstance(module, torch.nn.Linear): + constant_pruning_mask = torch.where( + module.weight == 0, + torch.tensor(0, dtype=torch.uint8), + torch.tensor(1, dtype=torch.uint8), + ) + module.register_buffer( + "constant_pruning_mask", constant_pruning_mask, persistent=False + ) + else: + attach_masks(module) diff --git a/src/sparsify/auto/tasks/finetune/runner.py b/src/sparsify/auto/tasks/finetune/runner.py new file mode 100644 index 00000000..5fe8d06a --- /dev/null +++ b/src/sparsify/auto/tasks/finetune/runner.py @@ -0,0 +1,75 @@ +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple + +from pydantic import BaseModel +from sparsify.auto.tasks.finetune.args import FineTuneTrainArgs +from sparsify.auto.tasks.finetune.finetune import main as train_hook +from sparsify.auto.tasks.runner import TaskRunner +from sparsify.auto.utils import HardwareSpecs +from sparsify.schemas import Metrics, SparsificationTrainingConfig +from sparsify.utils import TASK_REGISTRY + + +__all__ = [ + "LLMFinetuner", +] + + +@TaskRunner.register_task(task=TASK_REGISTRY["finetune"]) +class LLMFinetuner(TaskRunner): + """ + TaskRunner for LLM finetuning. Currently set-up as a shell to leverage TaskRunner's + ddp functionality for finetuning. Function definitions will be completed as + functionality is further supported. + """ + + train_hook = staticmethod(train_hook) + export_model_kwarg = "None" + + def __init__(self, config: SparsificationTrainingConfig): + super().__init__(config) + + @classmethod + def config_to_args( + cls, config: SparsificationTrainingConfig + ) -> Tuple[BaseModel, BaseModel]: + train_args = FineTuneTrainArgs(yaml=config.dataset) + + return train_args, None + + def update_run_directory_args(self): + pass + + def _train_completion_check(self) -> bool: + pass + + def _export_completion_check(self) -> bool: + pass + + def _update_train_args_post_failure(self, error_type: Exception): + pass + + def _update_export_args_post_failure(self, error_type: Exception): + pass + + def _get_metrics(self) -> Metrics: + pass + + def _get_default_deployment_directory(self, train_directory: str) -> str: + pass + + def tune_args_for_hardware(self, hardware_specs: HardwareSpecs): + pass diff --git a/src/sparsify/auto/tasks/runner.py b/src/sparsify/auto/tasks/runner.py index 97d465d6..6861d753 100644 --- a/src/sparsify/auto/tasks/runner.py +++ b/src/sparsify/auto/tasks/runner.py @@ -30,7 +30,7 @@ from pydantic import BaseModel from sparsify.auto.utils import ErrorHandler, HardwareSpecs, analyze_hardware from sparsify.schemas import Metrics, SparsificationTrainingConfig -from sparsify.utils import TASK_REGISTRY, TaskName +from sparsify.utils import TASK_REGISTRY, TaskName, get_task_info __all__ = [ @@ -55,6 +55,7 @@ "question_answering", "text_classification", "token_classification", + "finetune", ] ] _TASK_RUNNER_IMPLS = {} @@ -270,10 +271,12 @@ def _train_distributed(self): "--nproc_per_node", "auto", f"--master_port={_get_open_port_()}", - self.sparseml_train_entrypoint, ] + if self._config.task in get_task_info("finetune").aliases: + ddp_args += ["finetune"] + else: + ddp_args += [self.sparseml_train_entrypoint] ddp_args += self.train_args.serialize_to_cli_string(self.dashed_cli_kwargs) - launch_ddp(ddp_args) @retry_stage(stage="train") @@ -293,6 +296,10 @@ def train(self, train_directory: str, log_directory: str) -> Metrics: self.log_directory = log_directory self.update_run_directory_args() + if self._config.task in get_task_info("finetune").aliases: + self.train_args.checkpoints = self.run_directory + self.train_args.logging = self.log_directory + if self.use_distributed_training: self._train_distributed() else: @@ -505,7 +512,8 @@ def _dynamically_register_integration_runner(task: str): from sparsify.auto.tasks.image_classification import ( # noqa F401 ImageClassificationRunner, ) - + elif TASK_REGISTRY[task].domain == "llm": + from sparsify.auto.tasks.finetune import LLMFinetuner # noqa F401 else: raise ValueError( f"Task {task} is not yet supported. TaskRunner implementation " diff --git a/src/sparsify/auto/tasks/transformers/__init__.py b/src/sparsify/auto/tasks/transformers/__init__.py index b8794631..a3f3f4ca 100644 --- a/src/sparsify/auto/tasks/transformers/__init__.py +++ b/src/sparsify/auto/tasks/transformers/__init__.py @@ -15,5 +15,17 @@ # flake8: noqa # isort: skip_file + +def _check_nm_install(): + try: + from .runner import * + except ImportError as exception: + raise ImportError( + "Please install sparsify[nm] to use this pathway." + ) from exception + + +_check_nm_install() + from .args import * from .runner import * diff --git a/src/sparsify/auto/utils/error_handler.py b/src/sparsify/auto/utils/error_handler.py index 65bc533c..7240fa68 100644 --- a/src/sparsify/auto/utils/error_handler.py +++ b/src/sparsify/auto/utils/error_handler.py @@ -154,7 +154,7 @@ def raise_exception_summary(self): if all( [ ( - (type(error) == type(first_error)) + (type(error) is type(first_error)) and (error.args == first_error.args) ) for error in self._caught_runtime_errors diff --git a/src/sparsify/schemas/auto_api.py b/src/sparsify/schemas/auto_api.py index d6e94102..c34f68a4 100644 --- a/src/sparsify/schemas/auto_api.py +++ b/src/sparsify/schemas/auto_api.py @@ -160,14 +160,14 @@ class SparsificationTrainingConfig(BaseModel): dataset: str = Field( description="path to the dataset to train the task on", ) - base_model: str = Field( + base_model: Optional[str] = Field( description="path to the model to be sparsified", ) distill_teacher: str = Field( description="optional path to a distillation teacher for training", default="auto", ) - recipe: str = Field( + recipe: Optional[str] = Field( description="file path to or zoo stub of sparsification recipe to be applied", ) recipe_args: Dict[str, Any] = Field( diff --git a/src/sparsify/utils/constants.py b/src/sparsify/utils/constants.py index 3d893325..54771000 100644 --- a/src/sparsify/utils/constants.py +++ b/src/sparsify/utils/constants.py @@ -50,6 +50,12 @@ ] TASK_REGISTRY: Dict[str, TaskName] = { + "finetune": TaskName( + name="finetune", + aliases=["finetuning", "fine tune"], + domain="llm", + sub_domain="language_modeling", + ), "image_classification": TaskName( name="image_classification", aliases=["ic", "classification", "cv_classification"],