Skip to content

Commit 2abebb2

Browse files
authored
[TensorRT EP] No workspace size limit to TRT memory pool (microsoft#21643)
We saw some models failed to run due to OOM and can be fixed by increase trt_max_workspace_size. This PR makes no size limitation by default (max device memory) which is aligned with trtexec.
1 parent eeef0c8 commit 2abebb2

File tree

4 files changed

+10
-11
lines changed

4 files changed

+10
-11
lines changed

include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct OrtTensorRTProviderOptionsV2 {
1919
// can be updated using: UpdateTensorRTProviderOptionsWithValue
2020
int trt_max_partition_iterations{1000}; // maximum iterations for TensorRT parser to get capability
2121
int trt_min_subgraph_size{1}; // minimum size of TensorRT subgraphs
22-
size_t trt_max_workspace_size{1 << 30}; // maximum workspace size for TensorRT.
22+
size_t trt_max_workspace_size{0}; // maximum workspace size for TensorRT. Default is 0 means max device memory size
2323
int trt_fp16_enable{0}; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
2424
int trt_int8_enable{0}; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
2525
const char* trt_int8_calibration_table_name{nullptr}; // TensorRT INT8 calibration table name.

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,10 +1583,6 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
15831583
LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_min_subgraph_size must be a positive integer value. Set it to 1";
15841584
min_subgraph_size_ = 1;
15851585
}
1586-
if (max_workspace_size_ <= 0) {
1587-
LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_max_workspace_size must be a positive integer value. Set it to 1073741824 (1GB)";
1588-
max_workspace_size_ = 1 << 30;
1589-
}
15901586
if (dla_core_ < 0) {
15911587
LOGS_DEFAULT(WARNING) << "[TensorRT EP] TensorRT option trt_dla_core must be a non-negative integer value. Set it to 0";
15921588
dla_core_ = 0;
@@ -2756,7 +2752,9 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
27562752
auto trt_config = std::unique_ptr<nvinfer1::IBuilderConfig>(trt_builder->createBuilderConfig());
27572753
auto trt_parser = tensorrt_ptr::unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
27582754
trt_parser->parse(string_buf.data(), string_buf.size(), model_path_);
2759-
trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
2755+
if (max_workspace_size_ > 0) {
2756+
trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
2757+
}
27602758

27612759
// Force Pow + Reduce ops in layer norm to run in FP32 to avoid overflow
27622760
if (fp16_enable_ && layer_norm_fp32_fallback_) {
@@ -3363,7 +3361,7 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
33633361
&parsers_[context->node_name], &engines_[context->node_name], &contexts_[context->node_name],
33643362
&networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name],
33653363
input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_,
3366-
dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision,
3364+
dla_enable_, dla_core_, trt_node_name_with_precision,
33673365
engine_cache_enable_, cache_path_, runtime_.get(), profiles_[context->node_name],
33683366
context_memory_sharing_enable_, &max_ctx_mem_size_, dynamic_range_map, engine_decryption_enable_,
33693367
engine_decryption_, engine_encryption_, timing_cache_enable_, global_cache_path_, force_timing_cache_match_,
@@ -3538,7 +3536,9 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
35383536
trt_state->context->reset();
35393537
trt_state->engine->reset();
35403538
auto trt_config = std::unique_ptr<nvinfer1::IBuilderConfig>(trt_builder->createBuilderConfig());
3541-
trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, *(trt_state->max_workspace_size_ptr));
3539+
if (max_workspace_size_ > 0) {
3540+
trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);
3541+
}
35423542
for (auto trt_profile : trt_profiles) {
35433543
trt_config->addOptimizationProfile(trt_profile);
35443544
}

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ struct TensorrtFuncState {
175175
bool int8_calibration_cache_available = false;
176176
bool dla_enable = false;
177177
int dla_core = 0;
178-
size_t* max_workspace_size_ptr = nullptr;
179178
std::string trt_node_name_with_precision;
180179
bool engine_cache_enable = false;
181180
std::string engine_cache_path;
@@ -290,7 +289,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
290289
cudaStream_t stream_ = nullptr;
291290
int max_partition_iterations_ = 1000;
292291
size_t min_subgraph_size_ = 1;
293-
size_t max_workspace_size_ = 1 << 30; // 1GB
292+
size_t max_workspace_size_ = 0;
294293
bool fp16_enable_ = false;
295294
bool int8_enable_ = false;
296295
bool dla_enable_ = false;

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ struct TensorrtExecutionProviderInfo {
2222
bool has_trt_options{false};
2323
int max_partition_iterations{1000};
2424
int min_subgraph_size{1};
25-
size_t max_workspace_size{1 << 30};
25+
size_t max_workspace_size{0};
2626
bool fp16_enable{false};
2727
bool int8_enable{false};
2828
std::string int8_calibration_table_name{""};

0 commit comments

Comments
 (0)