Skip to content

Commit 1a5c755

Browse files
committed
[Kaldi] Update to 21.08
1 parent 26d8955 commit 1a5c755

30 files changed

+2258
-1195
lines changed

Kaldi/SpeechRecognition/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ data/*
22
!data/README.md
33
.*.swp
44
.*.swo
5+
.clang-format

Kaldi/SpeechRecognition/Dockerfile

+85-21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2019-2021 NVIDIA CORPORATION. All rights reserved.
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -11,42 +11,106 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14-
FROM nvcr.io/nvidia/kaldi:20.03-py3 as kb
15-
FROM nvcr.io/nvidia/tritonserver:20.03-py3
16-
ENV DEBIAN_FRONTEND=noninteractive
14+
ARG TRITONSERVER_IMAGE=nvcr.io/nvidia/tritonserver:21.05-py3
15+
ARG KALDI_IMAGE=nvcr.io/nvidia/kaldi:21.08-py3
16+
ARG PYTHON_VER=3.8
17+
18+
#
19+
# Kaldi shared library dependencies
20+
#
21+
FROM ${KALDI_IMAGE} as kaldi
22+
23+
#
24+
# Builder image based on Triton Server SDK image
25+
#
26+
FROM ${TRITONSERVER_IMAGE}-sdk as builder
27+
ARG PYTHON_VER
1728

1829
# Kaldi dependencies
19-
RUN apt-get update && apt-get install -y --no-install-recommends \
30+
RUN set -eux; \
31+
apt-get update; \
32+
apt-get install -yq --no-install-recommends \
2033
automake \
2134
autoconf \
2235
cmake \
2336
flac \
2437
gawk \
2538
libatlas3-base \
2639
libtool \
27-
python3.6 \
28-
python3.6-dev \
40+
python${PYTHON_VER} \
41+
python${PYTHON_VER}-dev \
2942
sox \
3043
subversion \
3144
unzip \
3245
bc \
3346
libatlas-base-dev \
34-
zlib1g-dev
47+
gfortran \
48+
zlib1g-dev; \
49+
rm -rf /var/lib/apt/lists/*
3550

36-
RUN mkdir /opt/trtis-kaldi && mkdir -p /workspace/model-repo/kaldi_online/1 && mkdir -p /mnt/model-repo
37-
# Copying static files
38-
COPY scripts /workspace/scripts
51+
# Add Kaldi dependency
52+
COPY --from=kaldi /opt/kaldi /opt/kaldi
53+
54+
# Set up Atlas
55+
RUN set -eux; \
56+
ln -sf /usr/include/x86_64-linux-gnu/atlas /usr/local/include/atlas; \
57+
ln -sf /usr/include/x86_64-linux-gnu/cblas.h /usr/local/include/cblas.h; \
58+
ln -sf /usr/include/x86_64-linux-gnu/clapack.h /usr/local/include/clapack.h; \
59+
ln -sf /usr/lib/x86_64-linux-gnu/atlas /usr/local/lib/atlas
3960

40-
# Moving Kaldi to container
41-
COPY --from=kb /opt/kaldi /opt/kaldi
42-
ENV LD_LIBRARY_PATH /opt/kaldi/src/lib/:$LD_LIBRARY_PATH
4361

44-
# Building the custom backend
45-
COPY trtis-kaldi-backend /workspace/trtis-kaldi-backend
46-
#COPY --from=cbe /workspace/install/custom-backend-sdk /workspace/trtis-kaldi-backend/custom-backend-sdk
47-
RUN cd /workspace/trtis-kaldi-backend && wget https://github.com/NVIDIA/tensorrt-inference-server/releases/download/v1.9.0/v1.9.0_ubuntu1804.custombackend.tar.gz -O custom-backend-sdk.tar.gz && tar -xzf custom-backend-sdk.tar.gz
48-
RUN cd /workspace/trtis-kaldi-backend/ && make && cp libkaldi-trtisbackend.so /workspace/model-repo/kaldi_online/1/ && cd - && rm -r /workspace/trtis-kaldi-backend
62+
#
63+
# Kaldi custom backend build
64+
#
65+
FROM builder as backend-build
66+
67+
# Build the custom backend
68+
COPY kaldi-asr-backend /workspace/triton-kaldi-backend
69+
RUN set -eux; \
70+
cd /workspace/triton-kaldi-backend; \
71+
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX="$(pwd)/install" \
72+
-B build .; \
73+
cmake --build build --parallel; \
74+
cmake --install build
75+
4976

50-
COPY scripts/nvidia_kaldi_trtis_entrypoint.sh /opt/trtis-kaldi
77+
#
78+
# Final server image
79+
#
80+
FROM ${TRITONSERVER_IMAGE}
81+
ARG PYTHON_VER
82+
83+
# Kaldi dependencies
84+
RUN set -eux; \
85+
apt-get update; \
86+
apt-get install -yq --no-install-recommends \
87+
automake \
88+
autoconf \
89+
cmake \
90+
flac \
91+
gawk \
92+
libatlas3-base \
93+
libtool \
94+
python${PYTHON_VER} \
95+
python${PYTHON_VER}-dev \
96+
sox \
97+
subversion \
98+
unzip \
99+
bc \
100+
libatlas-base-dev \
101+
zlib1g-dev; \
102+
rm -rf /var/lib/apt/lists/*
103+
104+
# Add Kaldi dependency
105+
COPY --from=kaldi /opt/kaldi /opt/kaldi
106+
107+
# Add Kaldi custom backend shared library and scripts
108+
COPY --from=backend-build /workspace/triton-kaldi-backend/install/backends/kaldi/libtriton_kaldi.so /workspace/model-repo/kaldi_online/1/
109+
COPY scripts /workspace/scripts
51110

52-
ENTRYPOINT ["/opt/trtis-kaldi/nvidia_kaldi_trtis_entrypoint.sh"]
111+
# Setup entrypoint and environment
112+
ENV LD_LIBRARY_PATH /opt/kaldi/src/lib/:/opt/tritonserver/lib:$LD_LIBRARY_PATH
113+
COPY scripts/nvidia_kaldi_triton_entrypoint.sh /opt/triton-kaldi/
114+
VOLUME /mnt/model-repo
115+
ENTRYPOINT ["/opt/triton-kaldi/nvidia_kaldi_triton_entrypoint.sh"]
116+
CMD ["tritonserver", "--model-repo=/workspace/model-repo"]
+86-13
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
1+
# Copyright (c) 2019-2021 NVIDIA CORPORATION. All rights reserved.
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -11,33 +11,106 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14-
FROM nvcr.io/nvidia/kaldi:20.03-py3 as kb
15-
FROM nvcr.io/nvidia/tritonserver:20.03-py3-clientsdk
14+
ARG TRITONSERVER_IMAGE=nvcr.io/nvidia/tritonserver:21.05-py3
15+
ARG KALDI_IMAGE=nvcr.io/nvidia/kaldi:21.08-py3
16+
ARG PYTHON_VER=3.8
17+
18+
19+
#
20+
# Kaldi shared library dependencies
21+
#
22+
FROM ${KALDI_IMAGE} as kaldi
23+
24+
25+
#
26+
# Builder image based on Triton Server SDK image
27+
#
28+
FROM ${TRITONSERVER_IMAGE}-sdk as builder
29+
ARG PYTHON_VER
30+
31+
# Kaldi dependencies
32+
RUN set -eux; \
33+
apt-get update; \
34+
apt-get install -yq --no-install-recommends \
35+
automake \
36+
autoconf \
37+
cmake \
38+
flac \
39+
gawk \
40+
libatlas3-base \
41+
libtool \
42+
python${PYTHON_VER} \
43+
python${PYTHON_VER}-dev \
44+
sox \
45+
subversion \
46+
unzip \
47+
bc \
48+
libatlas-base-dev \
49+
gfortran \
50+
zlib1g-dev; \
51+
rm -rf /var/lib/apt/lists/*
52+
53+
# Add Kaldi dependency
54+
COPY --from=kaldi /opt/kaldi /opt/kaldi
55+
56+
# Set up Atlas
57+
RUN set -eux; \
58+
ln -sf /usr/include/x86_64-linux-gnu/atlas /usr/local/include/atlas; \
59+
ln -sf /usr/include/x86_64-linux-gnu/cblas.h /usr/local/include/cblas.h; \
60+
ln -sf /usr/include/x86_64-linux-gnu/clapack.h /usr/local/include/clapack.h; \
61+
ln -sf /usr/lib/x86_64-linux-gnu/atlas /usr/local/lib/atlas
62+
63+
64+
#
65+
# Triton Kaldi client build
66+
#
67+
FROM builder as client-build
68+
69+
# Build the clients
70+
COPY kaldi-asr-client /workspace/triton-client
71+
RUN set -eux; \
72+
cd /workspace; \
73+
echo 'add_subdirectory(../../../triton-client src/c++/triton-client)' \
74+
>> /workspace/client/src/c++/CMakeLists.txt; \
75+
cmake -DCMAKE_BUILD_TYPE=Release -B build client; \
76+
cmake --build build --parallel --target cc-clients
77+
78+
79+
#
80+
# Final gRPC client image
81+
#
82+
FROM ${TRITONSERVER_IMAGE}
83+
ARG PYTHON_VER
1684

1785
# Kaldi dependencies
18-
RUN apt-get update && apt-get install -y --no-install-recommends \
86+
RUN set -eux; \
87+
apt-get update; \
88+
apt-get install -yq --no-install-recommends \
1989
automake \
2090
autoconf \
2191
cmake \
2292
flac \
2393
gawk \
2494
libatlas3-base \
2595
libtool \
26-
python3.6 \
27-
python3.6-dev \
96+
python${PYTHON_VER} \
97+
python${PYTHON_VER}-dev \
2898
sox \
2999
subversion \
30100
unzip \
31101
bc \
32102
libatlas-base-dev \
33-
zlib1g-dev
103+
zlib1g-dev; \
104+
rm -rf /var/lib/apt/lists/*
34105

35-
# Moving Kaldi to container
36-
COPY --from=kb /opt/kaldi /opt/kaldi
37-
ENV LD_LIBRARY_PATH /opt/kaldi/src/lib/:$LD_LIBRARY_PATH
106+
# Add Kaldi dependency
107+
COPY --from=kaldi /opt/kaldi /opt/kaldi
38108

109+
# Add Triton clients and scripts
110+
COPY --from=client-build /workspace/build/cc-clients/src/c++/triton-client/kaldi-asr-parallel-client /usr/local/bin/
39111
COPY scripts /workspace/scripts
40112

41-
COPY kaldi-asr-client /workspace/src/clients/c++/kaldi-asr-client
42-
RUN echo "add_subdirectory(kaldi-asr-client)" >> "/workspace/src/clients/c++/CMakeLists.txt"
43-
RUN cd /workspace/build/ && make -j16 trtis-clients
113+
# Setup environment and entrypoint
114+
ENV LD_LIBRARY_PATH /opt/kaldi/src/lib/:/opt/tritonserver/lib:$LD_LIBRARY_PATH
115+
VOLUME /mnt/model-repo
116+
ENTRYPOINT ["/usr/local/bin/kaldi-asr-parallel-client"]

Kaldi/SpeechRecognition/Dockerfile.notebook

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14-
FROM nvcr.io/nvidia/tritonserver:20.03-py3-clientsdk
14+
FROM nvcr.io/nvidia/tritonserver:21.05-py3-sdk
1515

1616
# Kaldi dependencies
1717
RUN apt-get update && apt-get install -y jupyter \

Kaldi/SpeechRecognition/README.md

+26-28
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,18 @@ A reference model is used by all test scripts and benchmarks presented in this r
4646
Details about parameters can be found in the [Parameters](#parameters) section.
4747

4848
* `model path`: Configured to use the pretrained LibriSpeech model.
49+
* `use_tensor_cores`: 1
50+
* `main_q_capacity`: 30000
51+
* `aux_q_capacity`: 400000
4952
* `beam`: 10
53+
* `num_channels`: 4000
5054
* `lattice_beam`: 7
5155
* `max_active`: 10,000
5256
* `frame_subsampling_factor`: 3
5357
* `acoustic_scale`: 1.0
54-
* `num_worker_threads`: 20
55-
* `max_execution_batch_size`: 256
56-
* `max_batch_size`: 4096
57-
* `instance_group.count`: 2
58+
* `num_worker_threads`: 40
59+
* `max_batch_size`: 400
60+
* `instance_group.count`: 1
5861

5962
## Setup
6063

@@ -134,9 +137,8 @@ The model configuration parameters are passed to the model and have an impact o
134137

135138
The inference engine configuration parameters configure the inference engine. They impact performance, but not accuracy.
136139

137-
* `max_batch_size`: The maximum number of inference channels opened at a given time. If set to `4096`, then one instance will handle at most 4096 concurrent users.
140+
* `max_batch_size`: The size of one execution batch on the GPU. This parameter should be set as large as necessary to saturate the GPU, but not bigger. Larger batches will lead to a higher throughput, smaller batches to lower latency.
138141
* `num_worker_threads`: The number of CPU threads for the postprocessing CPU tasks, such as lattice determinization and text generation from the lattice.
139-
* `max_execution_batch_size`: The size of one execution batch on the GPU. This parameter should be set as large as necessary to saturate the GPU, but not bigger. Larger batches will lead to a higher throughput, smaller batches to lower latency.
140142
* `input.WAV_DATA.dims`: The maximum number of samples per chunk. The value must be a multiple of `frame_subsampling_factor * chunks_per_frame`.
141143

142144
### Inference process
@@ -156,7 +158,7 @@ The client can be configured through a set of parameters that define its behavio
156158
-u <URL for inference service and its gRPC port>
157159
-o : Only feed each channel at realtime speed. Simulates online clients.
158160
-p : Print text outputs
159-
161+
-b : Print partial (best path) text outputs
160162
```
161163

162164
### Input/Output
@@ -187,13 +189,8 @@ Even if only the best path is used, we are still generating a full lattice for b
187189

188190
Support for Kaldi ASR models that are different from the provided LibriSpeech model is experimental. However, it is possible to modify the [Model Path](#model-path) section of the config file `model-repo/kaldi_online/config.pbtxt` to set up your own model.
189191

190-
The models and Kaldi allocators are currently not shared between instances. This means that if your model is large, you may end up with not enough memory on the GPU to store two different instances. If that's the case,
191-
you can set `count` to `1` in the [`instance_group` section](https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/model_configuration.html#instance-groups) of the config file.
192-
193192
## Performance
194193

195-
The performance measurements in this document were conducted at the time of publication and may not reflect the performance achieved from NVIDIA’s latest software release. For the most up-to-date performance measurements, go to [NVIDIA Data Center Deep Learning Product Performance](https://developer.nvidia.com/deep-learning-performance-training-inference).
196-
197194

198195
### Metrics
199196

@@ -207,8 +204,7 @@ Latency is defined as the delay between the availability of the last chunk of au
207204
4. *Server:* Compute inference of last chunk
208205
5. *Server:* Generate the raw lattice for the full utterance
209206
6. *Server:* Determinize the raw lattice
210-
7. *Server:* Generate the text output associated with the best path in the determinized lattice
211-
8. *Client:* Receive text output
207+
8. *Client:* Receive lattice output
212208
9. *Client:* Call callback with output
213209
10. ***t1** <- Current time*
214210

@@ -219,20 +215,18 @@ The latency is defined such as `latency = t1 - t0`.
219215
Our results were obtained by:
220216

221217
1. Building and starting the server as described in [Quick Start Guide](#quick-start-guide).
222-
2. Running `scripts/run_inference_all_v100.sh` and `scripts/run_inference_all_t4.sh`
223-
224-
225-
| GPU | Realtime I/O | Number of parallel audio channels | Throughput (RTFX) | Latency | | | |
226-
| ------ | ------ | ------ | ------ | ------ | ------ | ------ |------ |
227-
| | | | | 90% | 95% | 99% | Avg |
228-
| V100 | No | 2000 | 1506.5 | N/A | N/A | N/A | N/A |
229-
| V100 | Yes | 1500 | 1243.2 | 0.582 | 0.699 | 1.04 | 0.400 |
230-
| V100 | Yes | 1000 | 884.1 | 0.379 | 0.393 | 0.788 | 0.333 |
231-
| V100 | Yes | 800 | 660.2 | 0.334 | 0.340 | 0.438 | 0.288 |
232-
| T4 | No | 1000 | 675.2 | N/A | N/A | N/A| N/A |
233-
| T4 | Yes | 700 | 629.2 | 0.945 | 1.08 | 1.27 | 0.645 |
234-
| T4 | Yes | 400 | 373.7 | 0.579 | 0.624 | 0.758 | 0.452 |
235-
218+
2. Running `scripts/run_inference_all_a100.sh`, `scripts/run_inference_all_v100.sh` and `scripts/run_inference_all_t4.sh`
219+
220+
221+
| GPU | Realtime I/O | Number of parallel audio channels | Latency (s) | | | |
222+
| ----- | ------------ | --------------------------------- | ----------- | ----- | ----- | ----- |
223+
| | | | 90% | 95% | 99% | Avg |
224+
| A100 | Yes | 2000 | 0.11 | 0.12 | 0.14 | 0.09 |
225+
| V100 | Yes | 2000 | 0.42 | 0.50 | 0.61 | 0.23 |
226+
| V100 | Yes | 1000 | 0.09 | 0.09 | 0.11 | 0.07 |
227+
| T4 | Yes | 600 | 0.17 | 0.18 | 0.22 | 0.14 |
228+
| T4 | Yes | 400 | 0.12 | 0.13 | 0.15 | 0.10 |
229+
236230
## Release notes
237231

238232
### Changelog
@@ -244,5 +238,9 @@ April 2020
244238
* Printing WER accuracy in Triton client
245239
* Using the latest Kaldi GPU ASR pipeline, extended support for features (ivectors, fbanks)
246240

241+
July 2021
242+
* Significantly improve latency and throughput for the backend
243+
* Update Triton to v2.10.0
244+
247245
### Known issues
248246
* No multi-gpu support for the Triton integration

Kaldi/SpeechRecognition/data/README.md

Whitespace-only changes.

0 commit comments

Comments
 (0)