Skip to content

Commit 0c24d8d

Browse files
luminitavoicuacatangiu
authored andcommitted
jailer: add flag to spawn FC in a new PID ns
Signed-off-by: Luminita Voicu <[email protected]>
1 parent 664b851 commit 0c24d8d

File tree

5 files changed

+184
-16
lines changed

5 files changed

+184
-16
lines changed

src/jailer/src/env.rs

Lines changed: 109 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use std::ffi::{CStr, OsString};
5-
use std::fs::{self, canonicalize, File, Permissions};
5+
use std::fs::{self, canonicalize, File, OpenOptions, Permissions};
66
use std::os::unix::fs::PermissionsExt;
77
use std::os::unix::io::IntoRawFd;
88
use std::os::unix::process::CommandExt;
@@ -13,6 +13,8 @@ use crate::cgroup;
1313
use crate::cgroup::Cgroup;
1414
use crate::chroot::chroot;
1515
use crate::{Error, Result};
16+
use std::io;
17+
use std::io::Write;
1618
use utils::arg_parser::Error::MissingValue;
1719
use utils::syscall::SyscallReturnCode;
1820
use utils::{arg_parser, validators};
@@ -44,6 +46,10 @@ const DEV_NULL_WITH_NUL: &[u8] = b"/dev/null\0";
4446
const FOLDER_HIERARCHY: [&[u8]; 4] = [b"/\0", b"/dev\0", b"/dev/net\0", b"/run\0"];
4547
const FOLDER_PERMISSIONS: u32 = 0o700;
4648

49+
// When running with `--new-pid-ns` flag, the PID of the process running the exec_file differs
50+
// from jailer's and it is stored inside a dedicated file, prefixed with the below extension.
51+
const PID_FILE_EXTENSION: &str = ".pid";
52+
4753
// Helper function, since we'll use libc::dup2 a bunch of times for daemonization.
4854
fn dup2(old_fd: libc::c_int, new_fd: libc::c_int) -> Result<()> {
4955
// This is safe because we are using a library function with valid parameters.
@@ -60,8 +66,10 @@ pub struct Env {
6066
gid: u32,
6167
netns: Option<String>,
6268
daemonize: bool,
69+
new_pid_ns: bool,
6370
start_time_us: u64,
6471
start_time_cpu_us: u64,
72+
jailer_cpu_time_us: u64,
6573
extra_args: Vec<String>,
6674
cgroups: Vec<Cgroup>,
6775
}
@@ -125,6 +133,8 @@ impl Env {
125133

126134
let daemonize = arguments.flag_present("daemonize");
127135

136+
let new_pid_ns = arguments.flag_present("new-pid-ns");
137+
128138
// Optional arguments.
129139
let mut cgroups = Vec::new();
130140

@@ -165,8 +175,10 @@ impl Env {
165175
gid,
166176
netns,
167177
daemonize,
178+
new_pid_ns,
168179
start_time_us,
169180
start_time_cpu_us,
181+
jailer_cpu_time_us: 0,
170182
extra_args: arguments.extra_args(),
171183
cgroups,
172184
})
@@ -184,6 +196,56 @@ impl Env {
184196
self.uid
185197
}
186198

199+
fn exec_into_new_pid_ns(&mut self, chroot_exec_file: PathBuf) -> Result<()> {
200+
// Unshare into a new PID namespace.
201+
// The current process will not be moved into the newly created namespace, but its first
202+
// child will assume the role of init(1) in the new namespace.
203+
// The call is safe because we're invoking a C library function with valid parameters.
204+
SyscallReturnCode(unsafe { libc::unshare(libc::CLONE_NEWPID) })
205+
.into_empty_result()
206+
.map_err(Error::UnshareNewPID)?;
207+
208+
// Compute jailer's total CPU time up to the current time.
209+
self.jailer_cpu_time_us =
210+
utils::time::get_time_us(utils::time::ClockType::ProcessCpu) - self.start_time_cpu_us;
211+
212+
// Duplicate the current process. The child process will belong to the previously created
213+
// PID namespace.
214+
// TODO: replace the `unshare()` + `fork()` combo with `clone()` if we ever need to
215+
// squeeze every bit of start-up latency we can get
216+
let pid = unsafe { libc::fork() };
217+
match pid {
218+
0 => {
219+
// Reset process start time.
220+
self.start_time_cpu_us = 0;
221+
222+
Err(Error::Exec(self.exec_command(chroot_exec_file)))
223+
}
224+
child_pid => {
225+
// Save the PID of the process running the exec file provided
226+
// inside <chroot_exec_file>.pid file.
227+
self.save_exec_file_pid(child_pid, chroot_exec_file)?;
228+
unsafe { libc::exit(0) }
229+
}
230+
}
231+
}
232+
233+
fn save_exec_file_pid(&mut self, pid: i32, chroot_exec_file: PathBuf) -> Result<()> {
234+
let chroot_exec_file_str = chroot_exec_file
235+
.to_str()
236+
.ok_or_else(|| Error::FileName(chroot_exec_file.clone()))?;
237+
let pid_file_path =
238+
PathBuf::from(format!("{}{}", chroot_exec_file_str, PID_FILE_EXTENSION));
239+
let mut pid_file = OpenOptions::new()
240+
.write(true)
241+
.create_new(true)
242+
.open(pid_file_path.clone())
243+
.map_err(|e| Error::FileOpen(pid_file_path.clone(), e))?;
244+
245+
// Write PID to file.
246+
write!(pid_file, "{}", pid).map_err(|e| Error::Write(pid_file_path, e))
247+
}
248+
187249
fn mknod_and_own_dev(
188250
&self,
189251
dev_path_str: &'static [u8],
@@ -278,6 +340,21 @@ impl Env {
278340
.map_err(Error::CloseNetNsFd)
279341
}
280342

343+
fn exec_command(&self, chroot_exec_file: PathBuf) -> io::Error {
344+
Command::new(chroot_exec_file)
345+
.args(&["--id", &self.id])
346+
.args(&["--start-time-us", &self.start_time_us.to_string()])
347+
.args(&["--start-time-cpu-us", &self.start_time_cpu_us.to_string()])
348+
.args(&["--parent-cpu-time-us", &self.jailer_cpu_time_us.to_string()])
349+
.stdin(Stdio::inherit())
350+
.stdout(Stdio::inherit())
351+
.stderr(Stdio::inherit())
352+
.uid(self.uid())
353+
.gid(self.gid())
354+
.args(&self.extra_args)
355+
.exec()
356+
}
357+
281358
#[cfg(target_arch = "aarch64")]
282359
fn copy_cache_info(&self) -> Result<()> {
283360
use crate::{readln_special, to_cstring, writeln_special};
@@ -447,19 +524,12 @@ impl Env {
447524
.map_err(Error::CloseDevNullFd)?;
448525
}
449526

450-
Err(Error::Exec(
451-
Command::new(chroot_exec_file)
452-
.args(&["--id", &self.id])
453-
.args(&["--start-time-us", &self.start_time_us.to_string()])
454-
.args(&["--start-time-cpu-us", &self.start_time_cpu_us.to_string()])
455-
.stdin(Stdio::inherit())
456-
.stdout(Stdio::inherit())
457-
.stderr(Stdio::inherit())
458-
.uid(self.uid())
459-
.gid(self.gid())
460-
.args(self.extra_args)
461-
.exec(),
462-
))
527+
// If specified, exec the provided binary into a new PID namespace.
528+
if self.new_pid_ns {
529+
self.exec_into_new_pid_ns(chroot_exec_file)
530+
} else {
531+
Err(Error::Exec(self.exec_command(chroot_exec_file)))
532+
}
463533
}
464534
}
465535

@@ -483,6 +553,7 @@ mod tests {
483553
pub chroot_base: &'a str,
484554
pub netns: Option<&'a str>,
485555
pub daemonize: bool,
556+
pub new_pid_ns: bool,
486557
pub cgroups: Vec<&'a str>,
487558
}
488559

@@ -497,6 +568,7 @@ mod tests {
497568
chroot_base: "/",
498569
netns: Some("zzzns"),
499570
daemonize: true,
571+
new_pid_ns: true,
500572
cgroups: vec!["cpu.shares=2", "cpuset.mems=0"],
501573
}
502574
}
@@ -537,6 +609,10 @@ mod tests {
537609
arg_vec.push("--daemonize".to_string());
538610
}
539611

612+
if arg_vals.new_pid_ns {
613+
arg_vec.push("--new-pid-ns".to_string());
614+
}
615+
540616
arg_vec
541617
}
542618

@@ -577,10 +653,12 @@ mod tests {
577653

578654
assert_eq!(good_env.netns, good_arg_vals.netns.map(String::from));
579655
assert!(good_env.daemonize);
656+
assert!(good_env.new_pid_ns);
580657

581658
let another_good_arg_vals = ArgVals {
582659
netns: None,
583660
daemonize: false,
661+
new_pid_ns: false,
584662
..good_arg_vals
585663
};
586664

@@ -590,6 +668,7 @@ mod tests {
590668
let another_good_env = Env::new(&args, 0, 0)
591669
.expect("This another new environment should be created successfully.");
592670
assert!(!another_good_env.daemonize);
671+
assert!(!another_good_env.new_pid_ns);
593672

594673
let base_invalid_arg_vals = ArgVals {
595674
daemonize: true,
@@ -796,6 +875,7 @@ mod tests {
796875
chroot_base: some_dir_path,
797876
netns: Some("zzzns"),
798877
daemonize: false,
878+
new_pid_ns: false,
799879
cgroups: Vec::new(),
800880
};
801881
fs::write(some_file_path, "some_content").unwrap();
@@ -939,4 +1019,19 @@ mod tests {
9391019
let entries = fs::read_dir(&index_dest_path).unwrap();
9401020
assert_eq!(entries.enumerate().count(), 6);
9411021
}
1022+
1023+
#[test]
1024+
fn test_save_exec_file_pid() {
1025+
let exec_file_name = "file";
1026+
let pid_file_name = "file.pid";
1027+
let pid = 1;
1028+
1029+
let mut env = create_env();
1030+
env.save_exec_file_pid(pid, PathBuf::from(exec_file_name))
1031+
.unwrap();
1032+
1033+
let stored_pid = fs::read_to_string(pid_file_name);
1034+
fs::remove_file(pid_file_name).unwrap();
1035+
assert_eq!(stored_pid.unwrap(), "1");
1036+
}
9421037
}

src/jailer/src/main.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ pub enum Error {
6464
UmountOldRoot(io::Error),
6565
UnexpectedListenerFd(i32),
6666
UnshareNewNs(io::Error),
67+
UnshareNewPID(io::Error),
6768
UnsetCloexec(io::Error),
6869
Write(PathBuf, io::Error),
6970
}
@@ -200,6 +201,9 @@ impl fmt::Display for Error {
200201
UnshareNewNs(ref err) => {
201202
write!(f, "Failed to unshare into new mount namespace: {}", err)
202203
}
204+
UnshareNewPID(ref err) => {
205+
write!(f, "Failed to unshare into new PID namespace: {}", err)
206+
}
203207
UnsetCloexec(ref err) => write!(
204208
f,
205209
"Failed to unset the O_CLOEXEC flag on the socket fd: {}",
@@ -265,6 +269,11 @@ pub fn build_arg_parser() -> ArgParser<'static> {
265269
"Daemonize the jailer before exec, by invoking setsid(), and redirecting \
266270
the standard I/O file descriptors to /dev/null.",
267271
))
272+
.arg(
273+
Argument::new("new-pid-ns")
274+
.takes_value(false)
275+
.help("Exec into a new PID namespace."),
276+
)
268277
.arg(Argument::new("cgroup").allow_multiple(true).help(
269278
"Cgroup and value to be set by the jailer. It must follow this format: \
270279
<cgroup_file>=<value> (e.g cpu.shares=10). This argument can be used \
@@ -665,6 +674,10 @@ mod tests {
665674
format!("{}", Error::UnshareNewNs(io::Error::from_raw_os_error(42))),
666675
"Failed to unshare into new mount namespace: No message of desired type (os error 42)",
667676
);
677+
assert_eq!(
678+
format!("{}", Error::UnshareNewPID(io::Error::from_raw_os_error(42))),
679+
"Failed to unshare into new PID namespace: No message of desired type (os error 42)",
680+
);
668681
assert_eq!(
669682
format!("{}", Error::UnsetCloexec(io::Error::from_raw_os_error(42))),
670683
"Failed to unset the O_CLOEXEC flag on the socket fd: No message of desired type (os \

tests/framework/jailer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class JailerContext:
3232
chroot_base = None
3333
netns = None
3434
daemonize = None
35+
new_pid_ns = None
3536
extra_args = None
3637
api_socket_name = None
3738
cgroups = None
@@ -46,6 +47,7 @@ def __init__(
4647
chroot_base=DEFAULT_CHROOT_PATH,
4748
netns=None,
4849
daemonize=True,
50+
new_pid_ns=False,
4951
cgroups=None,
5052
**extra_args
5153
):
@@ -63,6 +65,7 @@ def __init__(
6365
self.chroot_base = chroot_base
6466
self.netns = netns if netns is not None else jailer_id
6567
self.daemonize = daemonize
68+
self.new_pid_ns = new_pid_ns
6669
self.extra_args = extra_args
6770
self.api_socket_name = DEFAULT_USOCKET_NAME
6871
self.cgroups = cgroups
@@ -103,6 +106,8 @@ def construct_param_list(self):
103106
jailer_param_list.extend(['--netns', str(self.netns_file_path())])
104107
if self.daemonize:
105108
jailer_param_list.append('--daemonize')
109+
if self.new_pid_ns:
110+
jailer_param_list.append('--new-pid-ns')
106111
if self.cgroups is not None:
107112
for cgroup in self.cgroups:
108113
jailer_param_list.extend(['--cgroup', str(cgroup)])

tests/integration_tests/performance/test_process_startup_time.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,21 @@
1414
# TODO: Keep a `current` startup time in S3 and validate we don't regress
1515

1616

17-
def test_startup_time(test_microvm_with_api):
18-
"""Check the startup time for jailer and Firecracker up to socket bind."""
17+
def test_startup_time_new_pid_ns(test_microvm_with_api):
18+
"""Check startup time when jailer is spawned in a new PID namespace."""
1919
microvm = test_microvm_with_api
20+
microvm.bin_cloner_path = None
21+
microvm.jailer.new_pid_ns = True
22+
_test_startup_time(microvm)
23+
24+
25+
def test_startup_time_daemonize(test_microvm_with_api):
26+
"""Check startup time when jailer spawns Firecracker in a new PID ns."""
27+
microvm = test_microvm_with_api
28+
_test_startup_time(microvm)
29+
30+
31+
def _test_startup_time(microvm):
2032
microvm.spawn()
2133

2234
microvm.basic_config(vcpu_count=2, mem_size_mib=1024)

0 commit comments

Comments
 (0)