Skip to content

Add Unicode support for process spawning on Windows #14075

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 14, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions src/liblibc/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ pub use funcs::bsd43::{shutdown};
#[cfg(windows)] pub use consts::os::extra::{TRUE, FALSE, INFINITE};
#[cfg(windows)] pub use consts::os::extra::{PROCESS_TERMINATE, PROCESS_QUERY_INFORMATION};
#[cfg(windows)] pub use consts::os::extra::{STILL_ACTIVE, DETACHED_PROCESS};
#[cfg(windows)] pub use consts::os::extra::{CREATE_NEW_PROCESS_GROUP};
#[cfg(windows)] pub use consts::os::extra::{CREATE_NEW_PROCESS_GROUP, CREATE_UNICODE_ENVIRONMENT};
#[cfg(windows)] pub use consts::os::extra::{FILE_BEGIN, FILE_END, FILE_CURRENT};
#[cfg(windows)] pub use consts::os::extra::{FILE_GENERIC_READ, FILE_GENERIC_WRITE};
#[cfg(windows)] pub use consts::os::extra::{FILE_SHARE_READ, FILE_SHARE_WRITE, FILE_SHARE_DELETE};
Expand Down Expand Up @@ -1937,6 +1937,7 @@ pub mod consts {

pub static DETACHED_PROCESS: DWORD = 0x00000008;
pub static CREATE_NEW_PROCESS_GROUP: DWORD = 0x00000200;
pub static CREATE_UNICODE_ENVIRONMENT: DWORD = 0x00000400;

pub static PIPE_ACCESS_DUPLEX: DWORD = 0x00000003;
pub static PIPE_ACCESS_INBOUND: DWORD = 0x00000001;
Expand Down Expand Up @@ -4193,8 +4194,8 @@ pub mod funcs {
pub mod kernel32 {
use types::os::arch::c95::{c_uint};
use types::os::arch::extra::{BOOL, DWORD, SIZE_T, HMODULE,
LPCWSTR, LPWSTR, LPCSTR, LPSTR,
LPCH, LPDWORD, LPVOID,
LPCWSTR, LPWSTR,
LPWCH, LPDWORD, LPVOID,
LPCVOID, LPOVERLAPPED,
LPSECURITY_ATTRIBUTES,
LPSTARTUPINFO,
Expand All @@ -4211,8 +4212,8 @@ pub mod funcs {
-> DWORD;
pub fn SetEnvironmentVariableW(n: LPCWSTR, v: LPCWSTR)
-> BOOL;
pub fn GetEnvironmentStringsA() -> LPCH;
pub fn FreeEnvironmentStringsA(env_ptr: LPCH) -> BOOL;
pub fn GetEnvironmentStringsW() -> LPWCH;
pub fn FreeEnvironmentStringsW(env_ptr: LPWCH) -> BOOL;
pub fn GetModuleFileNameW(hModule: HMODULE,
lpFilename: LPWSTR,
nSize: DWORD)
Expand Down Expand Up @@ -4251,16 +4252,16 @@ pub mod funcs {
dwProcessId: DWORD)
-> HANDLE;
pub fn GetCurrentProcess() -> HANDLE;
pub fn CreateProcessA(lpApplicationName: LPCSTR,
lpCommandLine: LPSTR,
pub fn CreateProcessW(lpApplicationName: LPCWSTR,
lpCommandLine: LPWSTR,
lpProcessAttributes:
LPSECURITY_ATTRIBUTES,
lpThreadAttributes:
LPSECURITY_ATTRIBUTES,
bInheritHandles: BOOL,
dwCreationFlags: DWORD,
lpEnvironment: LPVOID,
lpCurrentDirectory: LPCSTR,
lpCurrentDirectory: LPCWSTR,
lpStartupInfo: LPSTARTUPINFO,
lpProcessInformation:
LPPROCESS_INFORMATION)
Expand Down
36 changes: 22 additions & 14 deletions src/libnative/io/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ fn spawn_process_os(config: p::ProcessConfig,
GetCurrentProcess,
DuplicateHandle,
CloseHandle,
CreateProcessA
CreateProcessW
};
use libc::funcs::extra::msvcrt::get_osfhandle;

Expand Down Expand Up @@ -318,15 +318,15 @@ fn spawn_process_os(config: p::ProcessConfig,
let mut create_err = None;

// stolen from the libuv code.
let mut flags = 0;
let mut flags = libc::CREATE_UNICODE_ENVIRONMENT;
if config.detach {
flags |= libc::DETACHED_PROCESS | libc::CREATE_NEW_PROCESS_GROUP;
}

with_envp(env, |envp| {
with_dirp(dir, |dirp| {
cmd.with_c_str(|cmdp| {
let created = CreateProcessA(ptr::null(), mem::transmute(cmdp),
os::win32::as_mut_utf16_p(cmd, |cmdp| {
let created = CreateProcessW(ptr::null(), cmdp,
ptr::mut_null(), ptr::mut_null(), TRUE,
flags, envp, dirp, &mut si,
&mut pi);
Expand Down Expand Up @@ -409,16 +409,17 @@ fn make_command_line(prog: &str, args: &[~str]) -> ~str {
if quote {
cmd.push_char('"');
}
for i in range(0u, arg.len()) {
append_char_at(cmd, arg, i);
let argvec: Vec<char> = arg.chars().collect();
for i in range(0u, argvec.len()) {
append_char_at(cmd, &argvec, i);
}
if quote {
cmd.push_char('"');
}
}

fn append_char_at(cmd: &mut StrBuf, arg: &str, i: uint) {
match arg[i] as char {
fn append_char_at(cmd: &mut StrBuf, arg: &Vec<char>, i: uint) {
match *arg.get(i) {
'"' => {
// Escape quotes.
cmd.push_str("\\\"");
Expand All @@ -438,11 +439,11 @@ fn make_command_line(prog: &str, args: &[~str]) -> ~str {
}
}

fn backslash_run_ends_in_quote(s: &str, mut i: uint) -> bool {
while i < s.len() && s[i] as char == '\\' {
fn backslash_run_ends_in_quote(s: &Vec<char>, mut i: uint) -> bool {
while i < s.len() && *s.get(i) == '\\' {
i += 1;
}
return i < s.len() && s[i] as char == '"';
return i < s.len() && *s.get(i) == '"';
}
}

Expand Down Expand Up @@ -691,7 +692,7 @@ fn with_envp<T>(env: Option<~[(~str, ~str)]>, cb: |*mut c_void| -> T) -> T {

for pair in env.iter() {
let kv = format!("{}={}", *pair.ref0(), *pair.ref1());
blk.push_all(kv.as_bytes());
blk.push_all(kv.to_utf16().as_slice());
blk.push(0);
}

Expand All @@ -704,9 +705,12 @@ fn with_envp<T>(env: Option<~[(~str, ~str)]>, cb: |*mut c_void| -> T) -> T {
}

#[cfg(windows)]
fn with_dirp<T>(d: Option<&Path>, cb: |*libc::c_char| -> T) -> T {
fn with_dirp<T>(d: Option<&Path>, cb: |*u16| -> T) -> T {
match d {
Some(dir) => dir.with_c_str(|buf| cb(buf)),
Some(dir) => match dir.as_str() {
Some(dir_str) => os::win32::as_utf16_p(dir_str, cb),
None => cb(ptr::null())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This silently removes the working directory configuration if it's not in utf8 format (i.e., when dir.as_str() returns None). It should fail loudly, using expect.

Don't worry about changing this, though -- I'm currently rebasing another patch around this one, for dealing with non-unicode filesystems/arguments on other platforms, and I'll address the problem there.

},
None => cb(ptr::null())
}
}
Expand Down Expand Up @@ -860,5 +864,9 @@ mod tests {
make_command_line("echo", ["a b c".to_owned()]),
"echo \"a b c\"".to_owned()
);
assert_eq!(
make_command_line("\u03c0\u042f\u97f3\u00e6\u221e", []),
"\u03c0\u042f\u97f3\u00e6\u221e".to_owned()
);
}
}
46 changes: 36 additions & 10 deletions src/libstd/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
use clone::Clone;
use container::Container;
use libc;
use libc::{c_char, c_void, c_int};
use libc::{c_void, c_int};
use option::{Some, None, Option};
use os;
use ops::Drop;
Expand All @@ -49,6 +49,8 @@ use vec::Vec;

#[cfg(unix)]
use c_str::ToCStr;
#[cfg(unix)]
use libc::c_char;
#[cfg(windows)]
use str::OwnedStr;

Expand Down Expand Up @@ -141,10 +143,14 @@ pub mod win32 {
}

pub fn as_utf16_p<T>(s: &str, f: |*u16| -> T) -> T {
as_mut_utf16_p(s, |t| { f(t as *u16) })
}

pub fn as_mut_utf16_p<T>(s: &str, f: |*mut u16| -> T) -> T {
let mut t = s.to_utf16();
// Null terminate before passing on.
t.push(0u16);
f(t.as_ptr())
f(t.as_mut_ptr())
}
}

Expand Down Expand Up @@ -182,22 +188,42 @@ pub fn env_as_bytes() -> Vec<(~[u8],~[u8])> {
unsafe {
#[cfg(windows)]
unsafe fn get_env_pairs() -> Vec<~[u8]> {
use c_str;
use slice::raw;

use libc::funcs::extra::kernel32::{
GetEnvironmentStringsA,
FreeEnvironmentStringsA
GetEnvironmentStringsW,
FreeEnvironmentStringsW
};
let ch = GetEnvironmentStringsA();
let ch = GetEnvironmentStringsW();
if ch as uint == 0 {
fail!("os::env() failure getting env string from OS: {}",
os::last_os_error());
}
// Here, we lossily decode the string as UTF16.
//
// The docs suggest that the result should be in Unicode, but
// Windows doesn't guarantee it's actually UTF16 -- it doesn't
// validate the environment string passed to CreateProcess nor
// SetEnvironmentVariable. Yet, it's unlikely that returning a
// raw u16 buffer would be of practical use since the result would
// be inherently platform-dependent and introduce additional
// complexity to this code.
//
// Using the non-Unicode version of GetEnvironmentStrings is even
// worse since the result is in an OEM code page. Characters that
// can't be encoded in the code page would be turned into question
// marks.
let mut result = Vec::new();
c_str::from_c_multistring(ch as *c_char, None, |cstr| {
result.push(cstr.as_bytes_no_nul().to_owned());
});
FreeEnvironmentStringsA(ch);
let mut i = 0;
while *ch.offset(i) != 0 {
let p = &*ch.offset(i);
let len = ptr::position(p, |c| *c == 0);
raw::buf_as_slice(p, len, |s| {
result.push(str::from_utf16_lossy(s).into_bytes());
});
i += len as int + 1;
}
FreeEnvironmentStringsW(ch);
result
}
#[cfg(unix)]
Expand Down
86 changes: 86 additions & 0 deletions src/test/run-pass/process-spawn-with-unicode-params.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// no-prefer-dynamic

// The test copies itself into a subdirectory with a non-ASCII name and then
// runs it as a child process within the subdirectory. The parent process
// also adds an environment variable and an argument, both containing
// non-ASCII characters. The child process ensures all the strings are
// intact.

extern crate native;

use std::io;
use std::io::fs;
use std::io::process::Process;
use std::io::process::ProcessConfig;
use std::os;
use std::path::Path;

fn main() {
let my_args = os::args();
let my_cwd = os::getcwd();
let my_env = os::env();
let my_path = Path::new(os::self_exe_name().unwrap());
let my_dir = my_path.dir_path();
let my_ext = my_path.extension_str().unwrap_or("");

// some non-ASCII characters
let blah = "\u03c0\u042f\u97f3\u00e6\u221e";

let child_name = "child";
let child_dir = "process-spawn-with-unicode-params-" + blah;

// parameters sent to child / expected to be received from parent
let arg = blah;
let cwd = my_dir.join(Path::new(child_dir.clone()));
let env = ("RUST_TEST_PROC_SPAWN_UNICODE".to_owned(), blah.to_owned());

// am I the parent or the child?
if my_args.len() == 1 { // parent

let child_filestem = Path::new(child_name);
let child_filename = child_filestem.with_extension(my_ext);
let child_path = cwd.join(child_filename.clone());

// make a separate directory for the child
drop(fs::mkdir(&cwd, io::UserRWX).is_ok());
assert!(fs::copy(&my_path, &child_path).is_ok());

// run child
let p = Process::configure(ProcessConfig {
program: child_path.as_str().unwrap(),
args: [arg.to_owned()],
cwd: Some(&cwd),
env: Some(my_env.append_one(env).as_slice()),
.. ProcessConfig::new()
}).unwrap().wait_with_output();

// display the output
assert!(io::stdout().write(p.output.as_slice()).is_ok());
assert!(io::stderr().write(p.error.as_slice()).is_ok());

// make sure the child succeeded
assert!(p.status.success());

} else { // child

// check working directory (don't try to compare with `cwd` here!)
assert!(my_cwd.ends_with_path(&Path::new(child_dir)));

// check arguments
assert_eq!(my_args.get(1).as_slice(), arg);

// check environment variable
assert!(my_env.contains(&env));

};
}