@@ -436,6 +436,75 @@ TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_OutputModelBuffer) {
436
436
437
437
// Check that the compiled model has the expected number of EPContext nodes.
438
438
CheckEpContextNodeCounts (output_model_buffer, output_model_buffer_size, 2 , 2 );
439
+ allocator.Free (output_model_buffer);
440
+ }
441
+
442
+ // Test using the CompileModel() API with settings:
443
+ // - input model from buffer
444
+ // - save output model to buffer
445
+ // - test enabling AND disabling embed mode for context binary in EPContext node attributes
446
+ TEST_F (QnnHTPBackendTests, CompileApi_FromSessionOptions_InputAndOutputModelsInBuffers) {
447
+ // Create a test model and serialize it to a buffer.
448
+ TestModel test_model;
449
+ CreateTestModel (BuildGraphWithQAndNonQ (false ), 21 , logging::Severity::kERROR , test_model);
450
+ std::string model_data = test_model.Serialize ();
451
+
452
+ // Initialize session options with QNN EP
453
+ Ort::SessionOptions session_options;
454
+ ProviderOptions provider_options;
455
+ provider_options[" backend_type" ] = " htp" ;
456
+ provider_options[" offload_graph_io_quantization" ] = " 0" ;
457
+ session_options.AppendExecutionProvider (" QNN" , provider_options);
458
+
459
+ Ort::AllocatorWithDefaultOptions allocator;
460
+
461
+ // Test embed mode enabled.
462
+ {
463
+ void * output_model_buffer = nullptr ;
464
+ size_t output_model_buffer_size = 0 ;
465
+
466
+ // Create model compilation options from the session options.
467
+ Ort::ModelCompilationOptions compile_options (*ort_env, session_options);
468
+ compile_options.SetInputModelFromBuffer (reinterpret_cast <const void *>(model_data.data ()), model_data.size ());
469
+ compile_options.SetOutputModelBuffer (allocator, &output_model_buffer, &output_model_buffer_size);
470
+ compile_options.SetEpContextEmbedMode (true );
471
+
472
+ // Compile the model.
473
+ Ort::Status status = Ort::CompileModel (*ort_env, compile_options);
474
+ ASSERT_TRUE (status.IsOK ()) << status.GetErrorMessage ();
475
+
476
+ // Make sure the compiled model was saved to the buffer.
477
+ ASSERT_TRUE (output_model_buffer != nullptr );
478
+ ASSERT_TRUE (output_model_buffer_size > 0 );
479
+
480
+ // Check that the compiled model has the expected number of EPContext nodes.
481
+ CheckEpContextNodeCounts (output_model_buffer, output_model_buffer_size, 2 , 2 );
482
+ allocator.Free (output_model_buffer);
483
+ }
484
+
485
+ // Test embed mode disabled.
486
+ {
487
+ void * output_model_buffer = nullptr ;
488
+ size_t output_model_buffer_size = 0 ;
489
+
490
+ // Create model compilation options from the session options.
491
+ Ort::ModelCompilationOptions compile_options (*ort_env, session_options);
492
+ compile_options.SetInputModelFromBuffer (reinterpret_cast <const void *>(model_data.data ()), model_data.size ());
493
+ compile_options.SetOutputModelBuffer (allocator, &output_model_buffer, &output_model_buffer_size);
494
+ compile_options.SetEpContextEmbedMode (false );
495
+
496
+ // Compile the model.
497
+ Ort::Status status = Ort::CompileModel (*ort_env, compile_options);
498
+ ASSERT_TRUE (status.IsOK ()) << status.GetErrorMessage ();
499
+
500
+ // Make sure the compiled model was saved to the buffer.
501
+ ASSERT_TRUE (output_model_buffer != nullptr );
502
+ ASSERT_TRUE (output_model_buffer_size > 0 );
503
+
504
+ // Check that the compiled model has the expected number of EPContext nodes.
505
+ CheckEpContextNodeCounts (output_model_buffer, output_model_buffer_size, 2 , 2 );
506
+ allocator.Free (output_model_buffer);
507
+ }
439
508
}
440
509
441
510
// Test using the CompileModel() API with settings:
@@ -485,6 +554,7 @@ TEST_F(QnnHTPBackendTests, CompileApi_FromSessionOptions_OutputModelBuffer_Outpu
485
554
486
555
// Check that the compiled model has the expected number of EPContext nodes.
487
556
CheckEpContextNodeCounts (output_model_buffer, output_model_buffer_size, 2 , 2 );
557
+ allocator.Free (output_model_buffer);
488
558
}
489
559
490
560
// Test that models with 1 non-quantized FusedMatMul node and 1 quantized Add node can still generate the context binary
@@ -1566,7 +1636,7 @@ TEST_F(QnnHTPBackendTests, LoadFromArrayWithQnnEpContextGenPathValidation) {
1566
1636
ORT_CATCH (const std::exception & e) {
1567
1637
ORT_HANDLE_EXCEPTION ([&e]() {
1568
1638
std::string e_message1 (std::string (e.what ()));
1569
- ASSERT_TRUE (e_message1.find (" Please specify a valid ep.context_file_path. " ) != std::string::npos);
1639
+ ASSERT_TRUE (e_message1.find (" Please specify a valid ep.context_file_path" ) != std::string::npos);
1570
1640
});
1571
1641
}
1572
1642
@@ -1577,7 +1647,7 @@ TEST_F(QnnHTPBackendTests, LoadFromArrayWithQnnEpContextGenPathValidation) {
1577
1647
ORT_CATCH (const std::exception & ex) {
1578
1648
ORT_HANDLE_EXCEPTION ([&ex]() {
1579
1649
std::string e_message2 (std::string (ex.what ()));
1580
- ASSERT_TRUE (e_message2.find (" Please specify a valid ep.context_file_path. " ) != std::string::npos);
1650
+ ASSERT_TRUE (e_message2.find (" Please specify a valid ep.context_file_path" ) != std::string::npos);
1581
1651
});
1582
1652
}
1583
1653
}
0 commit comments