Skip to content

Commit b09803e

Browse files
committed
Address review comments
1 parent f411852 commit b09803e

File tree

1 file changed

+36
-22
lines changed

1 file changed

+36
-22
lines changed

src/libstd/sys/windows/stdio.rs

+36-22
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#![unstable(issue = "0", feature = "windows_stdio")]
22

33
use cell::Cell;
4+
use char::decode_utf16;
45
use cmp;
56
use io;
67
use ptr;
@@ -64,22 +65,27 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
6465
//
6566
// If the data is not valid UTF-8 we write out as many bytes as are valid.
6667
// Only when there are no valid bytes (which will happen on the next call), return an error.
67-
let len = cmp::min(data.len(), MAX_BUFFER_SIZE);
68+
let len = cmp::min(data.len(), MAX_BUFFER_SIZE / 2);
6869
let utf8 = match str::from_utf8(&data[..len]) {
6970
Ok(s) => s,
7071
Err(ref e) if e.valid_up_to() == 0 => {
7172
return Err(io::Error::new(io::ErrorKind::InvalidData,
72-
"Windows stdio in console mode does not support non-UTF-8 byte sequences; \
73-
see https://github.com/rust-lang/rust/issues/23344"))
73+
"Windows stdio in console mode does not support writing non-UTF-8 byte sequences"))
7474
},
7575
Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
7676
};
77-
let utf16 = utf8.encode_utf16().collect::<Vec<u16>>();
77+
let mut utf16 = [0u16; MAX_BUFFER_SIZE / 2];
78+
let mut len_utf16 = 0;
79+
for (chr, dest) in utf8.encode_utf16().zip(utf16.iter_mut()) {
80+
*dest = chr;
81+
len_utf16 += 1;
82+
}
83+
let utf16 = &utf16[..len_utf16];
7884

7985
let mut written = write_u16s(handle, &utf16)?;
8086

8187
// Figure out how many bytes of as UTF-8 were written away as UTF-16.
82-
if written >= utf16.len() {
88+
if written == utf16.len() {
8389
Ok(utf8.len())
8490
} else {
8591
// Make sure we didn't end up writing only half of a surrogate pair (even though the chance
@@ -90,7 +96,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
9096
let first_char_remaining = utf16[written];
9197
if first_char_remaining >= 0xDCEE && first_char_remaining <= 0xDFFF { // low surrogate
9298
// We just hope this works, and give up otherwise
93-
let _ = write_u16s(handle, &utf16[written..written]);
99+
let _ = write_u16s(handle, &utf16[written..written+1]);
94100
written += 1;
95101
}
96102
// Calculate the number of bytes of `utf8` that were actually written.
@@ -103,6 +109,7 @@ fn write(handle_id: c::DWORD, data: &[u8]) -> io::Result<usize> {
103109
_ => 3,
104110
};
105111
}
112+
debug_assert!(String::from_utf16(&utf16[..written]).unwrap() == utf8[..count]);
106113
Ok(count)
107114
}
108115
}
@@ -137,7 +144,7 @@ impl Stdin {
137144
return Ok(0);
138145
} else if buf.len() < 4 {
139146
return Err(io::Error::new(io::ErrorKind::InvalidInput,
140-
"Windows stdin in console mode does not support a buffer too small to; \
147+
"Windows stdin in console mode does not support a buffer too small to \
141148
guarantee holding one arbitrary UTF-8 character (4 bytes)"))
142149
}
143150

@@ -147,27 +154,14 @@ impl Stdin {
147154
// lost.
148155
let amount = cmp::min(buf.len() / 3, utf16_buf.len());
149156
let read = self.read_u16s_fixup_surrogates(handle, &mut utf16_buf, amount)?;
150-
let utf16 = &utf16_buf[..read];
151157

152-
// FIXME: it would be nice if we could directly decode into the buffer instead of doing an
153-
// allocation.
154-
let data = match String::from_utf16(&utf16) {
155-
Ok(utf8) => utf8.into_bytes(),
156-
Err(..) => {
157-
// We can't really do any better than forget all data and return an error.
158-
return Err(io::Error::new(io::ErrorKind::InvalidData,
159-
"Windows stdin in console mode does not support non-UTF-16 input; \
160-
encountered unpaired surrogate"))
161-
},
162-
};
163-
buf.copy_from_slice(&data);
164-
Ok(data.len())
158+
utf16_to_utf8(&utf16_buf[..read], buf)
165159
}
166160

167161
// We assume that if the last `u16` is an unpaired surrogate they got sliced apart by our
168162
// buffer size, and keep it around for the next read hoping to put them together.
169163
// This is a best effort, and may not work if we are not the only reader on Stdin.
170-
pub fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize)
164+
fn read_u16s_fixup_surrogates(&self, handle: c::HANDLE, buf: &mut [u16], mut amount: usize)
171165
-> io::Result<usize>
172166
{
173167
// Insert possibly remaining unpaired surrogate from last read.
@@ -223,6 +217,26 @@ fn read_u16s(handle: c::HANDLE, buf: &mut [u16]) -> io::Result<usize> {
223217
Ok(amount as usize)
224218
}
225219

220+
#[allow(unused)]
221+
fn utf16_to_utf8(utf16: &[u16], utf8: &mut [u8]) -> io::Result<usize> {
222+
let mut written = 0;
223+
for chr in decode_utf16(utf16.iter().cloned()) {
224+
match chr {
225+
Ok(chr) => {
226+
chr.encode_utf8(&mut utf8[written..]);
227+
written += chr.len_utf8();
228+
}
229+
Err(_) => {
230+
// We can't really do any better than forget all data and return an error.
231+
return Err(io::Error::new(io::ErrorKind::InvalidData,
232+
"Windows stdin in console mode does not support non-UTF-16 input; \
233+
encountered unpaired surrogate"))
234+
}
235+
}
236+
}
237+
Ok(written)
238+
}
239+
226240
impl Stdout {
227241
pub fn new() -> io::Result<Stdout> {
228242
Ok(Stdout)

0 commit comments

Comments
 (0)