Skip to content

Commit 12e6b53

Browse files
committed
Auto merge of #45711 - tirr-c:unicode-span, r=estebank
Display spans correctly when there are zero-width or wide characters Hopefully... * fixes #45211 * fixes #8706 --- Before: ``` error: invalid width `7` for integer literal --> unicode_2.rs:12:25 | 12 | let _ = ("a̐éö̲", 0u7); | ^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `42` for integer literal --> unicode_2.rs:13:20 | 13 | let _ = ("아あ", 1i42); | ^^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: aborting due to 2 previous errors ``` After: ``` error: invalid width `7` for integer literal --> unicode_2.rs:12:25 | 12 | let _ = ("a̐éö̲", 0u7); | ^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: invalid width `42` for integer literal --> unicode_2.rs:13:20 | 13 | let _ = ("아あ", 1i42); | ^^^^ | = help: valid widths are 8, 16, 32, 64 and 128 error: aborting due to 2 previous errors ``` Spans might display incorrectly on the browser. r? @estebank
2 parents d762b1d + 272c2fa commit 12e6b53

File tree

14 files changed

+231
-16
lines changed

14 files changed

+231
-16
lines changed

src/Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/librustc/ich/impls_syntax.rs

+16
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
364364
end_pos: _,
365365
ref lines,
366366
ref multibyte_chars,
367+
ref non_narrow_chars,
367368
} = *self;
368369

369370
name.hash_stable(hcx, hasher);
@@ -389,6 +390,12 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
389390
for &char_pos in multibyte_chars.iter() {
390391
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
391392
}
393+
394+
let non_narrow_chars = non_narrow_chars.borrow();
395+
non_narrow_chars.len().hash_stable(hcx, hasher);
396+
for &char_pos in non_narrow_chars.iter() {
397+
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
398+
}
392399
}
393400
}
394401

@@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar,
408415

409416
(pos.0 - filemap_start.0, bytes as u32)
410417
}
418+
419+
fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
420+
filemap_start: ::syntax_pos::BytePos)
421+
-> (u32, u32) {
422+
let pos = swc.pos();
423+
let width = swc.width();
424+
425+
(pos.0 - filemap_start.0, width as u32)
426+
}

src/librustc_errors/emitter.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
use self::Destination::*;
1212

13-
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos};
13+
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan};
1414

1515
use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper, DiagnosticId};
1616
use RenderSpan::*;
@@ -201,17 +201,17 @@ impl EmitterWriter {
201201
// 6..7. This is degenerate input, but it's best to degrade
202202
// gracefully -- and the parser likes to supply a span like
203203
// that for EOF, in particular.
204-
if lo.col == hi.col && lo.line == hi.line {
205-
hi.col = CharPos(lo.col.0 + 1);
204+
if lo.col_display == hi.col_display && lo.line == hi.line {
205+
hi.col_display += 1;
206206
}
207207

208208
let ann_type = if lo.line != hi.line {
209209
let ml = MultilineAnnotation {
210210
depth: 1,
211211
line_start: lo.line,
212212
line_end: hi.line,
213-
start_col: lo.col.0,
214-
end_col: hi.col.0,
213+
start_col: lo.col_display,
214+
end_col: hi.col_display,
215215
is_primary: span_label.is_primary,
216216
label: span_label.label.clone(),
217217
};
@@ -221,8 +221,8 @@ impl EmitterWriter {
221221
AnnotationType::Singleline
222222
};
223223
let ann = Annotation {
224-
start_col: lo.col.0,
225-
end_col: hi.col.0,
224+
start_col: lo.col_display,
225+
end_col: hi.col_display,
226226
is_primary: span_label.is_primary,
227227
label: span_label.label.clone(),
228228
annotation_type: ann_type,

src/librustc_metadata/decoder.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata {
11891189
end_pos,
11901190
lines,
11911191
multibyte_chars,
1192+
non_narrow_chars,
11921193
.. } = filemap_to_import;
11931194

11941195
let source_length = (end_pos - start_pos).to_usize();
@@ -1206,14 +1207,19 @@ impl<'a, 'tcx> CrateMetadata {
12061207
for mbc in &mut multibyte_chars {
12071208
mbc.pos = mbc.pos - start_pos;
12081209
}
1210+
let mut non_narrow_chars = non_narrow_chars.into_inner();
1211+
for swc in &mut non_narrow_chars {
1212+
*swc = *swc - start_pos;
1213+
}
12091214

12101215
let local_version = local_codemap.new_imported_filemap(name,
12111216
name_was_remapped,
12121217
self.cnum.as_u32(),
12131218
src_hash,
12141219
source_length,
12151220
lines,
1216-
multibyte_chars);
1221+
multibyte_chars,
1222+
non_narrow_chars);
12171223
debug!("CrateMetaData::imported_filemaps alloc \
12181224
filemap {:?} original (start_pos {:?} end_pos {:?}) \
12191225
translated (start_pos {:?} end_pos {:?})",

src/libsyntax/codemap.rs

+40-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ impl CodeMap {
242242
src_hash: u128,
243243
source_len: usize,
244244
mut file_local_lines: Vec<BytePos>,
245-
mut file_local_multibyte_chars: Vec<MultiByteChar>)
245+
mut file_local_multibyte_chars: Vec<MultiByteChar>,
246+
mut file_local_non_narrow_chars: Vec<NonNarrowChar>)
246247
-> Rc<FileMap> {
247248
let start_pos = self.next_start_pos();
248249
let mut files = self.files.borrow_mut();
@@ -258,6 +259,10 @@ impl CodeMap {
258259
mbc.pos = mbc.pos + start_pos;
259260
}
260261

262+
for swc in &mut file_local_non_narrow_chars {
263+
*swc = *swc + start_pos;
264+
}
265+
261266
let filemap = Rc::new(FileMap {
262267
name: filename,
263268
name_was_remapped,
@@ -270,6 +275,7 @@ impl CodeMap {
270275
end_pos,
271276
lines: RefCell::new(file_local_lines),
272277
multibyte_chars: RefCell::new(file_local_multibyte_chars),
278+
non_narrow_chars: RefCell::new(file_local_non_narrow_chars),
273279
});
274280

275281
files.push(filemap.clone());
@@ -297,6 +303,24 @@ impl CodeMap {
297303
let line = a + 1; // Line numbers start at 1
298304
let linebpos = (*f.lines.borrow())[a];
299305
let linechpos = self.bytepos_to_file_charpos(linebpos);
306+
let col = chpos - linechpos;
307+
308+
let col_display = {
309+
let non_narrow_chars = f.non_narrow_chars.borrow();
310+
let start_width_idx = non_narrow_chars
311+
.binary_search_by_key(&linebpos, |x| x.pos())
312+
.unwrap_or_else(|x| x);
313+
let end_width_idx = non_narrow_chars
314+
.binary_search_by_key(&pos, |x| x.pos())
315+
.unwrap_or_else(|x| x);
316+
let special_chars = end_width_idx - start_width_idx;
317+
let non_narrow: usize =
318+
non_narrow_chars[start_width_idx..end_width_idx]
319+
.into_iter()
320+
.map(|x| x.width())
321+
.sum();
322+
col.0 - special_chars + non_narrow
323+
};
300324
debug!("byte pos {:?} is on the line at byte pos {:?}",
301325
pos, linebpos);
302326
debug!("char pos {:?} is on the line at char pos {:?}",
@@ -306,14 +330,28 @@ impl CodeMap {
306330
Loc {
307331
file: f,
308332
line,
309-
col: chpos - linechpos,
333+
col,
334+
col_display,
310335
}
311336
}
312337
Err(f) => {
338+
let col_display = {
339+
let non_narrow_chars = f.non_narrow_chars.borrow();
340+
let end_width_idx = non_narrow_chars
341+
.binary_search_by_key(&pos, |x| x.pos())
342+
.unwrap_or_else(|x| x);
343+
let non_narrow: usize =
344+
non_narrow_chars[0..end_width_idx]
345+
.into_iter()
346+
.map(|x| x.width())
347+
.sum();
348+
chpos.0 - end_width_idx + non_narrow
349+
};
313350
Loc {
314351
file: f,
315352
line: 0,
316353
col: chpos,
354+
col_display,
317355
}
318356
}
319357
}

src/libsyntax/parse/lexer/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ impl<'a> StringReader<'a> {
433433
self.filemap.record_multibyte_char(self.pos, new_ch_len);
434434
}
435435
}
436+
self.filemap.record_width(self.pos, new_ch);
436437
} else {
437438
self.ch = None;
438439
self.pos = new_pos;

src/libsyntax_pos/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ crate-type = ["dylib"]
1111
[dependencies]
1212
serialize = { path = "../libserialize" }
1313
rustc_data_structures = { path = "../librustc_data_structures" }
14+
unicode-width = "0.1.4"

src/libsyntax_pos/lib.rs

+91-4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
4444
extern crate serialize;
4545
extern crate serialize as rustc_serialize; // used by deriving
4646

47+
extern crate unicode_width;
48+
4749
pub mod hygiene;
4850
pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind};
4951

@@ -494,6 +496,63 @@ pub struct MultiByteChar {
494496
pub bytes: usize,
495497
}
496498

499+
/// Identifies an offset of a non-narrow character in a FileMap
500+
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
501+
pub enum NonNarrowChar {
502+
/// Represents a zero-width character
503+
ZeroWidth(BytePos),
504+
/// Represents a wide (fullwidth) character
505+
Wide(BytePos),
506+
}
507+
508+
impl NonNarrowChar {
509+
fn new(pos: BytePos, width: usize) -> Self {
510+
match width {
511+
0 => NonNarrowChar::ZeroWidth(pos),
512+
2 => NonNarrowChar::Wide(pos),
513+
_ => panic!("width {} given for non-narrow character", width),
514+
}
515+
}
516+
517+
/// Returns the absolute offset of the character in the CodeMap
518+
pub fn pos(&self) -> BytePos {
519+
match *self {
520+
NonNarrowChar::ZeroWidth(p) |
521+
NonNarrowChar::Wide(p) => p,
522+
}
523+
}
524+
525+
/// Returns the width of the character, 0 (zero-width) or 2 (wide)
526+
pub fn width(&self) -> usize {
527+
match *self {
528+
NonNarrowChar::ZeroWidth(_) => 0,
529+
NonNarrowChar::Wide(_) => 2,
530+
}
531+
}
532+
}
533+
534+
impl Add<BytePos> for NonNarrowChar {
535+
type Output = Self;
536+
537+
fn add(self, rhs: BytePos) -> Self {
538+
match self {
539+
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
540+
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
541+
}
542+
}
543+
}
544+
545+
impl Sub<BytePos> for NonNarrowChar {
546+
type Output = Self;
547+
548+
fn sub(self, rhs: BytePos) -> Self {
549+
match self {
550+
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
551+
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
552+
}
553+
}
554+
}
555+
497556
/// The state of the lazy external source loading mechanism of a FileMap.
498557
#[derive(PartialEq, Eq, Clone)]
499558
pub enum ExternalSource {
@@ -552,11 +611,13 @@ pub struct FileMap {
552611
pub lines: RefCell<Vec<BytePos>>,
553612
/// Locations of multi-byte characters in the source code
554613
pub multibyte_chars: RefCell<Vec<MultiByteChar>>,
614+
/// Width of characters that are not narrow in the source code
615+
pub non_narrow_chars: RefCell<Vec<NonNarrowChar>>,
555616
}
556617

557618
impl Encodable for FileMap {
558619
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
559-
s.emit_struct("FileMap", 7, |s| {
620+
s.emit_struct("FileMap", 8, |s| {
560621
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
561622
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
562623
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
@@ -610,6 +671,9 @@ impl Encodable for FileMap {
610671
})?;
611672
s.emit_struct_field("multibyte_chars", 5, |s| {
612673
(*self.multibyte_chars.borrow()).encode(s)
674+
})?;
675+
s.emit_struct_field("non_narrow_chars", 7, |s| {
676+
(*self.non_narrow_chars.borrow()).encode(s)
613677
})
614678
})
615679
}
@@ -618,7 +682,7 @@ impl Encodable for FileMap {
618682
impl Decodable for FileMap {
619683
fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> {
620684

621-
d.read_struct("FileMap", 6, |d| {
685+
d.read_struct("FileMap", 8, |d| {
622686
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
623687
let name_was_remapped: bool =
624688
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
@@ -657,6 +721,8 @@ impl Decodable for FileMap {
657721
})?;
658722
let multibyte_chars: Vec<MultiByteChar> =
659723
d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?;
724+
let non_narrow_chars: Vec<NonNarrowChar> =
725+
d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
660726
Ok(FileMap {
661727
name,
662728
name_was_remapped,
@@ -671,7 +737,8 @@ impl Decodable for FileMap {
671737
src_hash,
672738
external_src: RefCell::new(ExternalSource::AbsentOk),
673739
lines: RefCell::new(lines),
674-
multibyte_chars: RefCell::new(multibyte_chars)
740+
multibyte_chars: RefCell::new(multibyte_chars),
741+
non_narrow_chars: RefCell::new(non_narrow_chars)
675742
})
676743
})
677744
}
@@ -709,6 +776,7 @@ impl FileMap {
709776
end_pos: Pos::from_usize(end_pos),
710777
lines: RefCell::new(Vec::new()),
711778
multibyte_chars: RefCell::new(Vec::new()),
779+
non_narrow_chars: RefCell::new(Vec::new()),
712780
}
713781
}
714782

@@ -798,6 +866,23 @@ impl FileMap {
798866
self.multibyte_chars.borrow_mut().push(mbc);
799867
}
800868

869+
pub fn record_width(&self, pos: BytePos, ch: char) {
870+
let width = match ch {
871+
'\t' | '\n' =>
872+
// Tabs will consume one column.
873+
// Make newlines take one column so that displayed spans can point them.
874+
1,
875+
ch =>
876+
// Assume control characters are zero width.
877+
// FIXME: How can we decide between `width` and `width_cjk`?
878+
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
879+
};
880+
// Only record non-narrow characters.
881+
if width != 1 {
882+
self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
883+
}
884+
}
885+
801886
pub fn is_real_file(&self) -> bool {
802887
!(self.name.starts_with("<") &&
803888
self.name.ends_with(">"))
@@ -944,7 +1029,9 @@ pub struct Loc {
9441029
/// The (1-based) line number
9451030
pub line: usize,
9461031
/// The (0-based) column offset
947-
pub col: CharPos
1032+
pub col: CharPos,
1033+
/// The (0-based) column offset when displayed
1034+
pub col_display: usize,
9481035
}
9491036

9501037
/// A source code location used as the result of lookup_char_pos_adj

src/test/ui/codemap_tests/unicode.stderr

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc
22
--> $DIR/unicode.rs:11:8
33
|
44
11 | extern "路濫狼á́́" fn foo() {}
5-
| ^^^^^^^^
5+
| ^^^^^^^^^
66

77
error: aborting due to previous error
88

0 commit comments

Comments
 (0)