@@ -40,6 +40,9 @@ use std::sync::mpsc::{RecvTimeoutError, TryRecvError};
40
40
use std:: sync:: Mutex ;
41
41
use std:: time:: Duration ;
42
42
43
+ use lazy_static:: lazy_static; // for CLEANUP_LEVEL arg to be globally visible
44
+ use std:: sync:: RwLock ;
45
+
43
46
#[ cfg( target_arch = "x86_64" ) ]
44
47
use crate :: device_manager:: legacy:: PortIODeviceManager ;
45
48
use crate :: device_manager:: mmio:: MMIODeviceManager ;
@@ -93,6 +96,62 @@ pub const FC_EXIT_CODE_BAD_CONFIGURATION: u8 = 152;
93
96
/// Command line arguments parsing error.
94
97
pub const FC_EXIT_CODE_ARG_PARSING : u8 = 153 ;
95
98
99
+ #[ derive( Debug ) ]
100
+ /// Possible errors that could be encountered while processing a cleanup level value
101
+ pub enum CleanupLevelError {
102
+ /// Failed to parse to `u8`.
103
+ Parse ( std:: num:: ParseIntError ) ,
104
+ /// Cleanup level is an `u8` value, other than 0, 1 or 2.
105
+ Level ( u8 ) ,
106
+ }
107
+
108
+ impl Display for CleanupLevelError {
109
+ fn fmt ( & self , f : & mut Formatter ) -> std:: fmt:: Result {
110
+ match * self {
111
+ CleanupLevelError :: Parse ( ref err) => {
112
+ write ! ( f, "Could not parse to 'u8': {}" , err)
113
+ } ,
114
+ CleanupLevelError :: Level ( arg) => write ! (
115
+ f,
116
+ "'{}' isn't a valid value for 'cleanup-level'. Must be 0, 1 or 2." ,
117
+ arg
118
+ ) ,
119
+ }
120
+ }
121
+ }
122
+
123
+ /// Possible values for cleanup level.
124
+ #[ repr( u8 ) ]
125
+ #[ derive( Clone , Copy , Debug , PartialEq ) ]
126
+ pub enum CleanupLevel {
127
+ /// Abruptly call std::exit, so even successful shutdown is like a crash (fastest)
128
+ Abrupt = 0 ,
129
+ /// Normal shutdown, but don't worry about freeing memory or joining threads (let OS do it)
130
+ Default = 1 ,
131
+ /// Make sure all threads join and exit code bubbles up to main, for sanitizer accounting.
132
+ Valgrind = 2 ,
133
+ }
134
+
135
+ impl CleanupLevel {
136
+ /// Converts from a cleanup level value of type String to the corresponding CleanupLevel variant
137
+ /// or returns an error if the parsing failed.
138
+ pub fn from_string ( cleanup_value : & str ) -> std:: result:: Result < Self , CleanupLevelError > {
139
+ match cleanup_value. parse :: < u8 > ( ) {
140
+ Ok ( 0 ) => Ok ( CleanupLevel :: Abrupt ) ,
141
+ Ok ( 1 ) => Ok ( CleanupLevel :: Default ) ,
142
+ Ok ( 2 ) => Ok ( CleanupLevel :: Valgrind ) ,
143
+ Ok ( level) => Err ( CleanupLevelError :: Level ( level) ) ,
144
+ Err ( err) => Err ( CleanupLevelError :: Parse ( err) ) ,
145
+ }
146
+ }
147
+ }
148
+
149
+ lazy_static ! {
150
+ /// Static instance for conveying the command-line `--cleanup-level` setting to the VMM.
151
+ pub static ref CLEANUP_LEVEL : RwLock <CleanupLevel > = RwLock :: new( CleanupLevel :: Default ) ;
152
+ }
153
+
154
+
96
155
/// Errors associated with the VMM internal logic. These errors cannot be generated by direct user
97
156
/// input, but can result from bad configuration of the host (for example if Firecracker doesn't
98
157
/// have permissions to open the KVM fd).
@@ -353,12 +412,23 @@ impl Vmm {
353
412
. map_err ( Error :: I8042Error )
354
413
}
355
414
356
- /// Waits for all vCPUs to exit. Does not terminate the Firecracker process.
357
- /// (See notes in main() about why ExitCode is bubbled up for clean shutdown.)
358
- pub fn stop ( & mut self ) {
415
+ /// This stops the VMM. Based on the setting of `--cleanup-level`, it may also exit the
416
+ /// Firecracker process entirely. It does so by default (cleanup-level of 1), but for
417
+ /// sanity checking and Valgrind use it's important to offer a higher level of cleanliness
418
+ /// which can gracefully exit all threads and bubble the exit_code up to main().
419
+ ///
420
+ pub fn stop ( & mut self , exit_code : ExitCode ) -> ExitCode {
359
421
info ! ( "Vmm is stopping." ) ;
360
422
361
- self . exit_vcpus ( ) . unwrap ( ) ; // exit all not-already-exited VCPUs, join their threads
423
+ let cleanup_level = * CLEANUP_LEVEL . read ( ) . unwrap ( ) ;
424
+
425
+ if cleanup_level == CleanupLevel :: Abrupt {
426
+ //
427
+ // Most severe form of exiting (also the fastest), does not run any Rust shutdown
428
+ // that happens with `std::process::exit`. Similar to crashing.
429
+ //
430
+ unsafe { libc:: _exit ( i32:: from ( exit_code) ) ; }
431
+ }
362
432
363
433
if let Some ( observer) = self . events_observer . as_mut ( ) {
364
434
if let Err ( e) = observer. on_vmm_stop ( ) {
@@ -370,6 +440,23 @@ impl Vmm {
370
440
if let Err ( e) = METRICS . write ( ) {
371
441
error ! ( "Failed to write metrics while stopping: {}" , e) ;
372
442
}
443
+
444
+ if cleanup_level == CleanupLevel :: Default {
445
+ //
446
+ // This runs as much shutdown code as Firecracker had before the inclusion of the
447
+ // ability to do CleanupLevel::Valgrind.
448
+ //
449
+ // !!! This preserves the usage of libc::_exit() vs. std::process::exit(), just to
450
+ // keep it the same. But was that initial choice intentional?
451
+ //
452
+ unsafe { libc:: _exit ( i32:: from ( exit_code) ) ; }
453
+ }
454
+
455
+ assert ! ( cleanup_level == CleanupLevel :: Valgrind ) ; // bubble up exit_code to main()
456
+
457
+ self . exit_vcpus ( ) . unwrap ( ) ; // exit all not-already-exited VCPUs, join their threads
458
+
459
+ exit_code
373
460
}
374
461
375
462
/// Saves the state of a paused Microvm.
@@ -777,6 +864,12 @@ impl Subscriber for Vmm {
777
864
}
778
865
}
779
866
867
+ // If the exit_code can't be found on any vcpu, it means that the exit signal
868
+ // has been issued by the i8042 controller in which case we exit with
869
+ // FC_EXIT_CODE_OK.
870
+ //
871
+ let exit_code = opt_exit_code. unwrap_or ( FC_EXIT_CODE_OK ) ;
872
+
780
873
// !!! The caller of this routine is receiving the exit code to bubble back up
781
874
// to the main() function to return cleanly. However, it does not have clean
782
875
// access to the Vmm to shut it down (here we have it, since it is `self`). It
@@ -787,13 +880,7 @@ impl Subscriber for Vmm {
787
880
// that will actually work with an exit code (all other Subscriber trait
788
881
// implementers must use process())
789
882
//
790
- self . stop ( ) ;
791
-
792
- // If the exit_code can't be found on any vcpu, it means that the exit signal
793
- // has been issued by the i8042 controller in which case we exit with
794
- // FC_EXIT_CODE_OK.
795
- //
796
- Some ( opt_exit_code. unwrap_or ( FC_EXIT_CODE_OK ) )
883
+ Some ( self . stop ( exit_code) ) // may exit abruptly, depending on CLEANUP_LEVEL
797
884
} else {
798
885
error ! ( "Spurious EventManager event for handler: Vmm" ) ;
799
886
None
0 commit comments