Skip to content

Commit 15dd158

Browse files
committed
Auto merge of #1256 - JOE1994:rw_widestr_helpers, r=RalfJung
helper functions for env_var emulation in Windows Moving some of the changes submitted in PR #1225, in order to prevent the original PR from bloating too much.
2 parents e5f1a29 + 5f9167b commit 15dd158

File tree

2 files changed

+123
-12
lines changed

2 files changed

+123
-12
lines changed

src/helpers.rs

Lines changed: 112 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::ffi::OsStr;
1+
use std::ffi::{OsStr, OsString};
22
use std::{iter, mem};
33
use std::convert::TryFrom;
44

@@ -456,6 +456,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
456456
}
457457
}
458458

459+
/// Dispatches to appropriate implementations for reading an OsString from Memory,
460+
/// depending on the interpretation target.
461+
/// FIXME: Use `Cow` to avoid copies
462+
fn read_os_str_from_target_str(&self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString> {
463+
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
464+
match target_os {
465+
"linux" | "macos" => self.read_os_str_from_c_str(scalar).map(|x| x.to_os_string()),
466+
"windows" => self.read_os_str_from_wide_str(scalar),
467+
unsupported => throw_unsup_format!("OsString support for target OS `{}` not yet available", unsupported),
468+
}
469+
}
470+
459471
/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
460472
/// the Unix APIs usually handle.
461473
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
@@ -471,14 +483,37 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
471483
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
472484
let s = std::str::from_utf8(bytes)
473485
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
474-
Ok(&OsStr::new(s))
486+
Ok(OsStr::new(s))
475487
}
476488

477489
let this = self.eval_context_ref();
478490
let bytes = this.memory.read_c_str(scalar)?;
479491
bytes_to_os_str(bytes)
480492
}
481493

494+
/// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
495+
/// which is what the Windows APIs usually handle.
496+
fn read_os_str_from_wide_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString>
497+
where
498+
'tcx: 'a,
499+
'mir: 'a,
500+
{
501+
#[cfg(windows)]
502+
pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
503+
Ok(std::os::windows::ffi::OsStringExt::from_wide(&u16_vec[..]))
504+
}
505+
#[cfg(not(windows))]
506+
pub fn u16vec_to_osstring<'tcx, 'a>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
507+
let s = String::from_utf16(&u16_vec[..])
508+
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
509+
Ok(s.into())
510+
}
511+
512+
let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?;
513+
u16vec_to_osstring(u16_vec)
514+
}
515+
516+
482517
/// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
483518
/// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying
484519
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
@@ -518,6 +553,66 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
518553
Ok((true, string_length))
519554
}
520555

556+
/// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what
557+
/// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying
558+
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
559+
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
560+
/// string length returned does not include the null terminator.
561+
fn write_os_str_to_wide_str(
562+
&mut self,
563+
os_str: &OsStr,
564+
mplace: MPlaceTy<'tcx, Tag>,
565+
size: u64,
566+
) -> InterpResult<'tcx, (bool, u64)> {
567+
#[cfg(windows)]
568+
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
569+
Ok(std::os::windows::ffi::OsStrExt::encode_wide(os_str).collect())
570+
}
571+
#[cfg(not(windows))]
572+
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
573+
// On non-Windows platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
574+
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
575+
// valid.
576+
os_str
577+
.to_str()
578+
.map(|s| s.encode_utf16().collect())
579+
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
580+
}
581+
582+
let u16_vec = os_str_to_u16vec(os_str)?;
583+
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required
584+
// 0x0000 terminator to memory would cause an out-of-bounds access.
585+
let string_length = u64::try_from(u16_vec.len()).unwrap();
586+
if size <= string_length {
587+
return Ok((false, string_length));
588+
}
589+
590+
let this = self.eval_context_mut();
591+
592+
// Store the UTF-16 string.
593+
let char_size = Size::from_bytes(2);
594+
for (idx, c) in u16_vec.into_iter().chain(iter::once(0x0000)).enumerate() {
595+
let place = this.mplace_field(mplace, idx as u64)?;
596+
this.write_scalar(Scalar::from_uint(c, char_size), place.into())?;
597+
}
598+
Ok((true, string_length))
599+
}
600+
601+
/// Dispatches to appropriate implementations for allocating & writing OsString in Memory,
602+
/// depending on the interpretation target.
603+
fn alloc_os_str_as_target_str(
604+
&mut self,
605+
os_str: &OsStr,
606+
memkind: MemoryKind<MiriMemoryKind>,
607+
) -> InterpResult<'tcx, Pointer<Tag>> {
608+
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
609+
match target_os {
610+
"linux" | "macos" => Ok(self.alloc_os_str_as_c_str(os_str, memkind)),
611+
"windows" => Ok(self.alloc_os_str_as_wide_str(os_str, memkind)),
612+
unsupported => throw_unsup_format!("OsString support for target OS `{}` not yet available", unsupported),
613+
}
614+
}
615+
521616
fn alloc_os_str_as_c_str(
522617
&mut self,
523618
os_str: &OsStr,
@@ -528,7 +623,21 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
528623

529624
let arg_type = this.tcx.mk_array(this.tcx.types.u8, size);
530625
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
531-
self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap();
626+
assert!(self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap().0);
627+
arg_place.ptr.assert_ptr()
628+
}
629+
630+
fn alloc_os_str_as_wide_str(
631+
&mut self,
632+
os_str: &OsStr,
633+
memkind: MemoryKind<MiriMemoryKind>,
634+
) -> Pointer<Tag> {
635+
let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0x0000` terminator.
636+
let this = self.eval_context_mut();
637+
638+
let arg_type = this.tcx.mk_array(this.tcx.types.u16, size);
639+
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
640+
assert!(self.write_os_str_to_wide_str(os_str, arg_place, size).unwrap().0);
532641
arg_place.ptr.assert_ptr()
533642
}
534643
}

src/shims/env.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use rustc_mir::interpret::Pointer;
1313
#[derive(Default)]
1414
pub struct EnvVars<'tcx> {
1515
/// Stores pointers to the environment variables. These variables must be stored as
16-
/// null-terminated C strings with the `"{name}={value}"` format.
16+
/// null-terminated target strings (c_str or wide_str) with the `"{name}={value}"` format.
1717
map: FxHashMap<OsString, Pointer<Tag>>,
1818

1919
/// Place where the `environ` static is stored. Lazily initialized, but then never changes.
@@ -29,7 +29,7 @@ impl<'tcx> EnvVars<'tcx> {
2929
for (name, value) in env::vars() {
3030
if !excluded_env_vars.contains(&name) {
3131
let var_ptr =
32-
alloc_env_var_as_c_str(name.as_ref(), value.as_ref(), ecx);
32+
alloc_env_var_as_target_str(name.as_ref(), value.as_ref(), ecx)?;
3333
ecx.machine.env_vars.map.insert(OsString::from(name), var_ptr);
3434
}
3535
}
@@ -38,21 +38,23 @@ impl<'tcx> EnvVars<'tcx> {
3838
}
3939
}
4040

41-
fn alloc_env_var_as_c_str<'mir, 'tcx>(
41+
fn alloc_env_var_as_target_str<'mir, 'tcx>(
4242
name: &OsStr,
4343
value: &OsStr,
4444
ecx: &mut InterpCx<'mir, 'tcx, Evaluator<'tcx>>,
45-
) -> Pointer<Tag> {
45+
) -> InterpResult<'tcx, Pointer<Tag>> {
4646
let mut name_osstring = name.to_os_string();
4747
name_osstring.push("=");
4848
name_osstring.push(value);
49-
ecx.alloc_os_str_as_c_str(name_osstring.as_os_str(), MiriMemoryKind::Machine.into())
49+
Ok(ecx.alloc_os_str_as_target_str(name_osstring.as_os_str(), MiriMemoryKind::Machine.into())?)
5050
}
5151

5252
impl<'mir, 'tcx> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
5353
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
5454
fn getenv(&mut self, name_op: OpTy<'tcx, Tag>) -> InterpResult<'tcx, Scalar<Tag>> {
5555
let this = self.eval_context_mut();
56+
let target_os = this.tcx.sess.target.target.target_os.as_str();
57+
assert!(target_os == "linux" || target_os == "macos", "`{}` is only available for the UNIX target family");
5658

5759
let name_ptr = this.read_scalar(name_op)?.not_undef()?;
5860
let name = this.read_os_str_from_c_str(name_ptr)?;
@@ -74,17 +76,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
7476

7577
let name_ptr = this.read_scalar(name_op)?.not_undef()?;
7678
let value_ptr = this.read_scalar(value_op)?.not_undef()?;
77-
let value = this.read_os_str_from_c_str(value_ptr)?;
79+
let value = this.read_os_str_from_target_str(value_ptr)?;
7880
let mut new = None;
7981
if !this.is_null(name_ptr)? {
80-
let name = this.read_os_str_from_c_str(name_ptr)?;
82+
let name = this.read_os_str_from_target_str(name_ptr)?;
8183
if !name.is_empty() && !name.to_string_lossy().contains('=') {
8284
new = Some((name.to_owned(), value.to_owned()));
8385
}
8486
}
8587
if let Some((name, value)) = new {
86-
let var_ptr = alloc_env_var_as_c_str(&name, &value, &mut this);
87-
if let Some(var) = this.machine.env_vars.map.insert(name.to_owned(), var_ptr) {
88+
let var_ptr = alloc_env_var_as_target_str(&name, &value, &mut this)?;
89+
if let Some(var) = this.machine.env_vars.map.insert(name, var_ptr) {
8890
this.memory
8991
.deallocate(var, None, MiriMemoryKind::Machine.into())?;
9092
}

0 commit comments

Comments
 (0)