2
2
// SPDX-License-Identifier: Apache-2.0
3
3
4
4
use std:: ffi:: { CStr , OsString } ;
5
- use std:: fs:: { self , canonicalize, File , Permissions } ;
5
+ use std:: fs:: { self , canonicalize, File , OpenOptions , Permissions } ;
6
6
use std:: os:: unix:: fs:: PermissionsExt ;
7
7
use std:: os:: unix:: io:: IntoRawFd ;
8
8
use std:: os:: unix:: process:: CommandExt ;
@@ -13,6 +13,8 @@ use crate::cgroup;
13
13
use crate :: cgroup:: Cgroup ;
14
14
use crate :: chroot:: chroot;
15
15
use crate :: { Error , Result } ;
16
+ use std:: io;
17
+ use std:: io:: Write ;
16
18
use utils:: arg_parser:: Error :: MissingValue ;
17
19
use utils:: syscall:: SyscallReturnCode ;
18
20
use utils:: { arg_parser, validators} ;
@@ -44,6 +46,10 @@ const DEV_NULL_WITH_NUL: &[u8] = b"/dev/null\0";
44
46
const FOLDER_HIERARCHY : [ & [ u8 ] ; 4 ] = [ b"/\0 " , b"/dev\0 " , b"/dev/net\0 " , b"/run\0 " ] ;
45
47
const FOLDER_PERMISSIONS : u32 = 0o700 ;
46
48
49
+ // When running with `--new-pid-ns` flag, the PID of the process running the exec_file differs
50
+ // from jailer's and it is stored inside a dedicated file, prefixed with the below extension.
51
+ const PID_FILE_EXTENSION : & str = ".pid" ;
52
+
47
53
// Helper function, since we'll use libc::dup2 a bunch of times for daemonization.
48
54
fn dup2 ( old_fd : libc:: c_int , new_fd : libc:: c_int ) -> Result < ( ) > {
49
55
// This is safe because we are using a library function with valid parameters.
@@ -60,8 +66,10 @@ pub struct Env {
60
66
gid : u32 ,
61
67
netns : Option < String > ,
62
68
daemonize : bool ,
69
+ new_pid_ns : bool ,
63
70
start_time_us : u64 ,
64
71
start_time_cpu_us : u64 ,
72
+ jailer_cpu_time_us : u64 ,
65
73
extra_args : Vec < String > ,
66
74
cgroups : Vec < Cgroup > ,
67
75
}
@@ -125,6 +133,8 @@ impl Env {
125
133
126
134
let daemonize = arguments. flag_present ( "daemonize" ) ;
127
135
136
+ let new_pid_ns = arguments. flag_present ( "new-pid-ns" ) ;
137
+
128
138
// Optional arguments.
129
139
let mut cgroups = Vec :: new ( ) ;
130
140
@@ -165,8 +175,10 @@ impl Env {
165
175
gid,
166
176
netns,
167
177
daemonize,
178
+ new_pid_ns,
168
179
start_time_us,
169
180
start_time_cpu_us,
181
+ jailer_cpu_time_us : 0 ,
170
182
extra_args : arguments. extra_args ( ) ,
171
183
cgroups,
172
184
} )
@@ -184,6 +196,56 @@ impl Env {
184
196
self . uid
185
197
}
186
198
199
+ fn exec_into_new_pid_ns ( & mut self , chroot_exec_file : PathBuf ) -> Result < ( ) > {
200
+ // Unshare into a new PID namespace.
201
+ // The current process will not be moved into the newly created namespace, but its first
202
+ // child will assume the role of init(1) in the new namespace.
203
+ // The call is safe because we're invoking a C library function with valid parameters.
204
+ SyscallReturnCode ( unsafe { libc:: unshare ( libc:: CLONE_NEWPID ) } )
205
+ . into_empty_result ( )
206
+ . map_err ( Error :: UnshareNewPID ) ?;
207
+
208
+ // Compute jailer's total CPU time up to the current time.
209
+ self . jailer_cpu_time_us =
210
+ utils:: time:: get_time_us ( utils:: time:: ClockType :: ProcessCpu ) - self . start_time_cpu_us ;
211
+
212
+ // Duplicate the current process. The child process will belong to the previously created
213
+ // PID namespace.
214
+ // TODO: replace the `unshare()` + `fork()` combo with `clone()` if we ever need to
215
+ // squeeze every bit of start-up latency we can get
216
+ let pid = unsafe { libc:: fork ( ) } ;
217
+ match pid {
218
+ 0 => {
219
+ // Reset process start time.
220
+ self . start_time_cpu_us = 0 ;
221
+
222
+ Err ( Error :: Exec ( self . exec_command ( chroot_exec_file) ) )
223
+ }
224
+ child_pid => {
225
+ // Save the PID of the process running the exec file provided
226
+ // inside <chroot_exec_file>.pid file.
227
+ self . save_exec_file_pid ( child_pid, chroot_exec_file) ?;
228
+ unsafe { libc:: exit ( 0 ) }
229
+ }
230
+ }
231
+ }
232
+
233
+ fn save_exec_file_pid ( & mut self , pid : i32 , chroot_exec_file : PathBuf ) -> Result < ( ) > {
234
+ let chroot_exec_file_str = chroot_exec_file
235
+ . to_str ( )
236
+ . ok_or_else ( || Error :: FileName ( chroot_exec_file. clone ( ) ) ) ?;
237
+ let pid_file_path =
238
+ PathBuf :: from ( format ! ( "{}{}" , chroot_exec_file_str, PID_FILE_EXTENSION ) ) ;
239
+ let mut pid_file = OpenOptions :: new ( )
240
+ . write ( true )
241
+ . create_new ( true )
242
+ . open ( pid_file_path. clone ( ) )
243
+ . map_err ( |e| Error :: FileOpen ( pid_file_path. clone ( ) , e) ) ?;
244
+
245
+ // Write PID to file.
246
+ write ! ( pid_file, "{}" , pid) . map_err ( |e| Error :: Write ( pid_file_path, e) )
247
+ }
248
+
187
249
fn mknod_and_own_dev (
188
250
& self ,
189
251
dev_path_str : & ' static [ u8 ] ,
@@ -278,6 +340,21 @@ impl Env {
278
340
. map_err ( Error :: CloseNetNsFd )
279
341
}
280
342
343
+ fn exec_command ( & self , chroot_exec_file : PathBuf ) -> io:: Error {
344
+ Command :: new ( chroot_exec_file)
345
+ . args ( & [ "--id" , & self . id ] )
346
+ . args ( & [ "--start-time-us" , & self . start_time_us . to_string ( ) ] )
347
+ . args ( & [ "--start-time-cpu-us" , & self . start_time_cpu_us . to_string ( ) ] )
348
+ . args ( & [ "--parent-cpu-time-us" , & self . jailer_cpu_time_us . to_string ( ) ] )
349
+ . stdin ( Stdio :: inherit ( ) )
350
+ . stdout ( Stdio :: inherit ( ) )
351
+ . stderr ( Stdio :: inherit ( ) )
352
+ . uid ( self . uid ( ) )
353
+ . gid ( self . gid ( ) )
354
+ . args ( & self . extra_args )
355
+ . exec ( )
356
+ }
357
+
281
358
#[ cfg( target_arch = "aarch64" ) ]
282
359
fn copy_cache_info ( & self ) -> Result < ( ) > {
283
360
use crate :: { readln_special, to_cstring, writeln_special} ;
@@ -447,19 +524,12 @@ impl Env {
447
524
. map_err ( Error :: CloseDevNullFd ) ?;
448
525
}
449
526
450
- Err ( Error :: Exec (
451
- Command :: new ( chroot_exec_file)
452
- . args ( & [ "--id" , & self . id ] )
453
- . args ( & [ "--start-time-us" , & self . start_time_us . to_string ( ) ] )
454
- . args ( & [ "--start-time-cpu-us" , & self . start_time_cpu_us . to_string ( ) ] )
455
- . stdin ( Stdio :: inherit ( ) )
456
- . stdout ( Stdio :: inherit ( ) )
457
- . stderr ( Stdio :: inherit ( ) )
458
- . uid ( self . uid ( ) )
459
- . gid ( self . gid ( ) )
460
- . args ( self . extra_args )
461
- . exec ( ) ,
462
- ) )
527
+ // If specified, exec the provided binary into a new PID namespace.
528
+ if self . new_pid_ns {
529
+ self . exec_into_new_pid_ns ( chroot_exec_file)
530
+ } else {
531
+ Err ( Error :: Exec ( self . exec_command ( chroot_exec_file) ) )
532
+ }
463
533
}
464
534
}
465
535
@@ -483,6 +553,7 @@ mod tests {
483
553
pub chroot_base : & ' a str ,
484
554
pub netns : Option < & ' a str > ,
485
555
pub daemonize : bool ,
556
+ pub new_pid_ns : bool ,
486
557
pub cgroups : Vec < & ' a str > ,
487
558
}
488
559
@@ -497,6 +568,7 @@ mod tests {
497
568
chroot_base : "/" ,
498
569
netns : Some ( "zzzns" ) ,
499
570
daemonize : true ,
571
+ new_pid_ns : true ,
500
572
cgroups : vec ! [ "cpu.shares=2" , "cpuset.mems=0" ] ,
501
573
}
502
574
}
@@ -537,6 +609,10 @@ mod tests {
537
609
arg_vec. push ( "--daemonize" . to_string ( ) ) ;
538
610
}
539
611
612
+ if arg_vals. new_pid_ns {
613
+ arg_vec. push ( "--new-pid-ns" . to_string ( ) ) ;
614
+ }
615
+
540
616
arg_vec
541
617
}
542
618
@@ -577,10 +653,12 @@ mod tests {
577
653
578
654
assert_eq ! ( good_env. netns, good_arg_vals. netns. map( String :: from) ) ;
579
655
assert ! ( good_env. daemonize) ;
656
+ assert ! ( good_env. new_pid_ns) ;
580
657
581
658
let another_good_arg_vals = ArgVals {
582
659
netns : None ,
583
660
daemonize : false ,
661
+ new_pid_ns : false ,
584
662
..good_arg_vals
585
663
} ;
586
664
@@ -590,6 +668,7 @@ mod tests {
590
668
let another_good_env = Env :: new ( & args, 0 , 0 )
591
669
. expect ( "This another new environment should be created successfully." ) ;
592
670
assert ! ( !another_good_env. daemonize) ;
671
+ assert ! ( !another_good_env. new_pid_ns) ;
593
672
594
673
let base_invalid_arg_vals = ArgVals {
595
674
daemonize : true ,
@@ -796,6 +875,7 @@ mod tests {
796
875
chroot_base : some_dir_path,
797
876
netns : Some ( "zzzns" ) ,
798
877
daemonize : false ,
878
+ new_pid_ns : false ,
799
879
cgroups : Vec :: new ( ) ,
800
880
} ;
801
881
fs:: write ( some_file_path, "some_content" ) . unwrap ( ) ;
@@ -939,4 +1019,19 @@ mod tests {
939
1019
let entries = fs:: read_dir ( & index_dest_path) . unwrap ( ) ;
940
1020
assert_eq ! ( entries. enumerate( ) . count( ) , 6 ) ;
941
1021
}
1022
+
1023
+ #[ test]
1024
+ fn test_save_exec_file_pid ( ) {
1025
+ let exec_file_name = "file" ;
1026
+ let pid_file_name = "file.pid" ;
1027
+ let pid = 1 ;
1028
+
1029
+ let mut env = create_env ( ) ;
1030
+ env. save_exec_file_pid ( pid, PathBuf :: from ( exec_file_name) )
1031
+ . unwrap ( ) ;
1032
+
1033
+ let stored_pid = fs:: read_to_string ( pid_file_name) ;
1034
+ fs:: remove_file ( pid_file_name) . unwrap ( ) ;
1035
+ assert_eq ! ( stored_pid. unwrap( ) , "1" ) ;
1036
+ }
942
1037
}
0 commit comments