@@ -195,29 +195,29 @@ impl Mode {
195
195
}
196
196
}
197
197
198
- /// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
199
- fn ascii_escapes_should_be_ascii ( self ) -> bool {
198
+ /// Are `\x80`..`\xff` allowed?
199
+ fn allow_high_bytes ( self ) -> bool {
200
200
match self {
201
- Char | Str => true ,
202
- Byte | ByteStr | CStr => false ,
201
+ Char | Str => false ,
202
+ Byte | ByteStr | CStr => true ,
203
203
RawStr | RawByteStr | RawCStr => unreachable ! ( ) ,
204
204
}
205
205
}
206
206
207
- /// Whether characters within the literal must be within the ASCII range.
207
+ /// Are unicode (non- ASCII) chars allowed?
208
208
#[ inline]
209
- fn chars_should_be_ascii ( self ) -> bool {
209
+ fn allow_unicode_chars ( self ) -> bool {
210
210
match self {
211
- Byte | ByteStr | RawByteStr => true ,
212
- Char | Str | RawStr | CStr | RawCStr => false ,
211
+ Byte | ByteStr | RawByteStr => false ,
212
+ Char | Str | RawStr | CStr | RawCStr => true ,
213
213
}
214
214
}
215
215
216
- /// Byte literals do not allow unicode escape.
217
- fn is_unicode_escape_disallowed ( self ) -> bool {
216
+ /// Are unicode escapes (`\u`) allowed?
217
+ fn allow_unicode_escapes ( self ) -> bool {
218
218
match self {
219
- Byte | ByteStr => true ,
220
- Char | Str | CStr => false ,
219
+ Byte | ByteStr => false ,
220
+ Char | Str | CStr => true ,
221
221
RawByteStr | RawStr | RawCStr => unreachable ! ( ) ,
222
222
}
223
223
}
@@ -255,25 +255,21 @@ fn scan_escape<T: From<char> + From<u8>>(
255
255
256
256
let value = ( hi * 16 + lo) as u8 ;
257
257
258
- return if mode. ascii_escapes_should_be_ascii ( ) && !value. is_ascii ( ) {
258
+ return if ! mode. allow_high_bytes ( ) && !value. is_ascii ( ) {
259
259
Err ( EscapeError :: OutOfRangeHexEscape )
260
260
} else {
261
261
// This may be a high byte, but that will only happen if `T` is
262
- // `MixedUnit`, because of the `ascii_escapes_should_be_ascii`
263
- // check above.
262
+ // `MixedUnit`, because of the `allow_high_bytes` check above.
264
263
Ok ( T :: from ( value as u8 ) )
265
264
} ;
266
265
}
267
- 'u' => return scan_unicode ( chars, mode. is_unicode_escape_disallowed ( ) ) . map ( T :: from) ,
266
+ 'u' => return scan_unicode ( chars, mode. allow_unicode_escapes ( ) ) . map ( T :: from) ,
268
267
_ => return Err ( EscapeError :: InvalidEscape ) ,
269
268
} ;
270
269
Ok ( T :: from ( res) )
271
270
}
272
271
273
- fn scan_unicode (
274
- chars : & mut Chars < ' _ > ,
275
- is_unicode_escape_disallowed : bool ,
276
- ) -> Result < char , EscapeError > {
272
+ fn scan_unicode ( chars : & mut Chars < ' _ > , allow_unicode_escapes : bool ) -> Result < char , EscapeError > {
277
273
// We've parsed '\u', now we have to parse '{..}'.
278
274
279
275
if chars. next ( ) != Some ( '{' ) {
@@ -301,7 +297,7 @@ fn scan_unicode(
301
297
302
298
// Incorrect syntax has higher priority for error reporting
303
299
// than unallowed value for a literal.
304
- if is_unicode_escape_disallowed {
300
+ if !allow_unicode_escapes {
305
301
return Err ( EscapeError :: UnicodeEscapeInByte ) ;
306
302
}
307
303
@@ -327,12 +323,8 @@ fn scan_unicode(
327
323
}
328
324
329
325
#[ inline]
330
- fn ascii_check ( c : char , chars_should_be_ascii : bool ) -> Result < char , EscapeError > {
331
- if chars_should_be_ascii && !c. is_ascii ( ) {
332
- Err ( EscapeError :: NonAsciiCharInByte )
333
- } else {
334
- Ok ( c)
335
- }
326
+ fn ascii_check ( c : char , allow_unicode_chars : bool ) -> Result < char , EscapeError > {
327
+ if allow_unicode_chars || c. is_ascii ( ) { Ok ( c) } else { Err ( EscapeError :: NonAsciiCharInByte ) }
336
328
}
337
329
338
330
fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
@@ -341,7 +333,7 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
341
333
'\\' => scan_escape ( chars, mode) ,
342
334
'\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
343
335
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
344
- _ => ascii_check ( c, mode. chars_should_be_ascii ( ) ) ,
336
+ _ => ascii_check ( c, mode. allow_unicode_chars ( ) ) ,
345
337
} ?;
346
338
if chars. next ( ) . is_some ( ) {
347
339
return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -356,7 +348,7 @@ where
356
348
F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
357
349
{
358
350
let mut chars = src. chars ( ) ;
359
- let chars_should_be_ascii = mode. chars_should_be_ascii ( ) ; // get this outside the loop
351
+ let allow_unicode_chars = mode. allow_unicode_chars ( ) ; // get this outside the loop
360
352
361
353
// The `start` and `end` computation here is complicated because
362
354
// `skip_ascii_whitespace` makes us to skip over chars without counting
@@ -381,7 +373,7 @@ where
381
373
}
382
374
'"' => Err ( EscapeError :: EscapeOnlyChar ) ,
383
375
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
384
- _ => ascii_check ( c, chars_should_be_ascii ) . map ( T :: from) ,
376
+ _ => ascii_check ( c, allow_unicode_chars ) . map ( T :: from) ,
385
377
} ;
386
378
let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
387
379
callback ( start..end, res) ;
@@ -423,7 +415,7 @@ where
423
415
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
424
416
{
425
417
let mut chars = src. chars ( ) ;
426
- let chars_should_be_ascii = mode. chars_should_be_ascii ( ) ; // get this outside the loop
418
+ let allow_unicode_chars = mode. allow_unicode_chars ( ) ; // get this outside the loop
427
419
428
420
// The `start` and `end` computation here matches the one in
429
421
// `unescape_non_raw_common` for consistency, even though this function
@@ -432,7 +424,7 @@ where
432
424
let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
433
425
let res = match c {
434
426
'\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
435
- _ => ascii_check ( c, chars_should_be_ascii ) ,
427
+ _ => ascii_check ( c, allow_unicode_chars ) ,
436
428
} ;
437
429
let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
438
430
callback ( start..end, res) ;
0 commit comments