Skip to content

Commit e33b128

Browse files
committed
Auto merge of #26816 - nrc:zero-codemap, r=@jroesch
See commits for details
2 parents 247a0d1 + f47d20a commit e33b128

File tree

8 files changed

+163
-113
lines changed

8 files changed

+163
-113
lines changed

src/librustdoc/clean/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1947,6 +1947,10 @@ impl Span {
19471947

19481948
impl Clean<Span> for syntax::codemap::Span {
19491949
fn clean(&self, cx: &DocContext) -> Span {
1950+
if *self == DUMMY_SP {
1951+
return Span::empty();
1952+
}
1953+
19501954
let cm = cx.sess().codemap();
19511955
let filename = cm.span_to_filename(*self);
19521956
let lo = cm.lookup_char_pos(self.lo);

src/libsyntax/codemap.rs

+107-87
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ impl Sub for CharPos {
115115
/// are *absolute* positions from the beginning of the codemap, not positions
116116
/// relative to FileMaps. Methods on the CodeMap can be used to relate spans back
117117
/// to the original source.
118+
/// You must be careful if the span crosses more than one file - you will not be
119+
/// able to use many of the functions on spans in codemap and you cannot assume
120+
/// that the length of the span = hi - lo; there may be space in the BytePos
121+
/// range between files.
118122
#[derive(Clone, Copy, Hash)]
119123
pub struct Span {
120124
pub lo: BytePos,
@@ -339,7 +343,7 @@ pub struct MultiByteChar {
339343
pub bytes: usize,
340344
}
341345

342-
/// A single source in the CodeMap
346+
/// A single source in the CodeMap.
343347
pub struct FileMap {
344348
/// The name of the file that the source came from, source that doesn't
345349
/// originate from files has names between angle brackets by convention,
@@ -508,6 +512,9 @@ impl FileMap {
508512
lines.get(line_number).map(|&line| {
509513
let begin: BytePos = line - self.start_pos;
510514
let begin = begin.to_usize();
515+
// We can't use `lines.get(line_number+1)` because we might
516+
// be parsing when we call this function and thus the current
517+
// line is the last one we have line info for.
511518
let slice = &src[begin..];
512519
match slice.find('\n') {
513520
Some(e) => &slice[..e],
@@ -598,27 +605,27 @@ impl CodeMap {
598605
Ok(self.new_filemap(path.to_str().unwrap().to_string(), src))
599606
}
600607

608+
fn next_start_pos(&self) -> usize {
609+
let files = self.files.borrow();
610+
match files.last() {
611+
None => 0,
612+
// Add one so there is some space between files. This lets us distinguish
613+
// positions in the codemap, even in the presence of zero-length files.
614+
Some(last) => last.end_pos.to_usize() + 1,
615+
}
616+
}
617+
618+
/// Creates a new filemap without setting its line information. If you don't
619+
/// intend to set the line information yourself, you should use new_filemap_and_lines.
601620
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
621+
let start_pos = self.next_start_pos();
602622
let mut files = self.files.borrow_mut();
603-
let start_pos = match files.last() {
604-
None => 0,
605-
Some(last) => last.end_pos.to_usize(),
606-
};
607623

608624
// Remove utf-8 BOM if any.
609625
if src.starts_with("\u{feff}") {
610626
src.drain(..3);
611627
}
612628

613-
// Append '\n' in case it's not already there.
614-
// This is a workaround to prevent CodeMap.lookup_filemap_idx from
615-
// accidentally overflowing into the next filemap in case the last byte
616-
// of span is also the last byte of filemap, which leads to incorrect
617-
// results from CodeMap.span_to_*.
618-
if !src.is_empty() && !src.ends_with("\n") {
619-
src.push('\n');
620-
}
621-
622629
let end_pos = start_pos + src.len();
623630

624631
let filemap = Rc::new(FileMap {
@@ -635,6 +642,21 @@ impl CodeMap {
635642
filemap
636643
}
637644

645+
/// Creates a new filemap and sets its line information.
646+
pub fn new_filemap_and_lines(&self, filename: &str, src: &str) -> Rc<FileMap> {
647+
let fm = self.new_filemap(filename.to_string(), src.to_owned());
648+
let mut byte_pos: u32 = 0;
649+
for line in src.lines() {
650+
// register the start of this line
651+
fm.next_line(BytePos(byte_pos));
652+
653+
// update byte_pos to include this line and the \n at the end
654+
byte_pos += line.len() as u32 + 1;
655+
}
656+
fm
657+
}
658+
659+
638660
/// Allocates a new FileMap representing a source file from an external
639661
/// crate. The source code of such an "imported filemap" is not available,
640662
/// but we still know enough to generate accurate debuginfo location
@@ -645,11 +667,8 @@ impl CodeMap {
645667
mut file_local_lines: Vec<BytePos>,
646668
mut file_local_multibyte_chars: Vec<MultiByteChar>)
647669
-> Rc<FileMap> {
670+
let start_pos = self.next_start_pos();
648671
let mut files = self.files.borrow_mut();
649-
let start_pos = match files.last() {
650-
None => 0,
651-
Some(last) => last.end_pos.to_usize(),
652-
};
653672

654673
let end_pos = Pos::from_usize(start_pos + source_len);
655674
let start_pos = Pos::from_usize(start_pos);
@@ -686,39 +705,61 @@ impl CodeMap {
686705

687706
/// Lookup source information about a BytePos
688707
pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
689-
let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
690-
let line = a + 1; // Line numbers start at 1
691708
let chpos = self.bytepos_to_file_charpos(pos);
692-
let linebpos = (*f.lines.borrow())[a];
693-
let linechpos = self.bytepos_to_file_charpos(linebpos);
694-
debug!("byte pos {:?} is on the line at byte pos {:?}",
695-
pos, linebpos);
696-
debug!("char pos {:?} is on the line at char pos {:?}",
697-
chpos, linechpos);
698-
debug!("byte is on line: {}", line);
699-
assert!(chpos >= linechpos);
700-
Loc {
701-
file: f,
702-
line: line,
703-
col: chpos - linechpos
709+
match self.lookup_line(pos) {
710+
Ok(FileMapAndLine { fm: f, line: a }) => {
711+
let line = a + 1; // Line numbers start at 1
712+
let linebpos = (*f.lines.borrow())[a];
713+
let linechpos = self.bytepos_to_file_charpos(linebpos);
714+
debug!("byte pos {:?} is on the line at byte pos {:?}",
715+
pos, linebpos);
716+
debug!("char pos {:?} is on the line at char pos {:?}",
717+
chpos, linechpos);
718+
debug!("byte is on line: {}", line);
719+
assert!(chpos >= linechpos);
720+
Loc {
721+
file: f,
722+
line: line,
723+
col: chpos - linechpos,
724+
}
725+
}
726+
Err(f) => {
727+
Loc {
728+
file: f,
729+
line: 0,
730+
col: chpos,
731+
}
732+
}
704733
}
705734
}
706735

707-
fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
736+
// If the relevant filemap is empty, we don't return a line number.
737+
fn lookup_line(&self, pos: BytePos) -> Result<FileMapAndLine, Rc<FileMap>> {
708738
let idx = self.lookup_filemap_idx(pos);
709739

710740
let files = self.files.borrow();
711741
let f = (*files)[idx].clone();
742+
743+
let len = f.lines.borrow().len();
744+
if len == 0 {
745+
return Err(f);
746+
}
747+
712748
let mut a = 0;
713749
{
714750
let lines = f.lines.borrow();
715751
let mut b = lines.len();
716752
while b - a > 1 {
717753
let m = (a + b) / 2;
718-
if (*lines)[m] > pos { b = m; } else { a = m; }
754+
if (*lines)[m] > pos {
755+
b = m;
756+
} else {
757+
a = m;
758+
}
719759
}
760+
assert!(a <= lines.len());
720761
}
721-
FileMapAndLine {fm: f, line: a}
762+
Ok(FileMapAndLine { fm: f, line: a })
722763
}
723764

724765
pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
@@ -853,7 +894,7 @@ impl CodeMap {
853894
FileMapAndBytePos {fm: fm, pos: offset}
854895
}
855896

856-
/// Converts an absolute BytePos to a CharPos relative to the filemap and above.
897+
/// Converts an absolute BytePos to a CharPos relative to the filemap.
857898
pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
858899
let idx = self.lookup_filemap_idx(bpos);
859900
let files = self.files.borrow();
@@ -880,12 +921,15 @@ impl CodeMap {
880921
CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes)
881922
}
882923

924+
// Return the index of the filemap (in self.files) which contains pos.
883925
fn lookup_filemap_idx(&self, pos: BytePos) -> usize {
884926
let files = self.files.borrow();
885927
let files = &*files;
886-
let len = files.len();
928+
let count = files.len();
929+
930+
// Binary search for the filemap.
887931
let mut a = 0;
888-
let mut b = len;
932+
let mut b = count;
889933
while b - a > 1 {
890934
let m = (a + b) / 2;
891935
if files[m].start_pos > pos {
@@ -894,26 +938,8 @@ impl CodeMap {
894938
a = m;
895939
}
896940
}
897-
// There can be filemaps with length 0. These have the same start_pos as
898-
// the previous filemap, but are not the filemaps we want (because they
899-
// are length 0, they cannot contain what we are looking for). So,
900-
// rewind until we find a useful filemap.
901-
loop {
902-
let lines = files[a].lines.borrow();
903-
let lines = lines;
904-
if !lines.is_empty() {
905-
break;
906-
}
907-
if a == 0 {
908-
panic!("position {} does not resolve to a source location",
909-
pos.to_usize());
910-
}
911-
a -= 1;
912-
}
913-
if a >= len {
914-
panic!("position {} does not resolve to a source location",
915-
pos.to_usize())
916-
}
941+
942+
assert!(a < count, "position {} does not resolve to a source location", pos.to_usize());
917943

918944
return a;
919945
}
@@ -1027,10 +1053,13 @@ mod tests {
10271053
let fm = cm.new_filemap("blork.rs".to_string(),
10281054
"first line.\nsecond line".to_string());
10291055
fm.next_line(BytePos(0));
1056+
// Test we can get lines with partial line info.
10301057
assert_eq!(fm.get_line(0), Some("first line."));
1031-
// TESTING BROKEN BEHAVIOR:
1058+
// TESTING BROKEN BEHAVIOR: line break declared before actual line break.
10321059
fm.next_line(BytePos(10));
10331060
assert_eq!(fm.get_line(1), Some("."));
1061+
fm.next_line(BytePos(12));
1062+
assert_eq!(fm.get_line(2), Some("second line"));
10341063
}
10351064

10361065
#[test]
@@ -1056,9 +1085,9 @@ mod tests {
10561085

10571086
fm1.next_line(BytePos(0));
10581087
fm1.next_line(BytePos(12));
1059-
fm2.next_line(BytePos(24));
1060-
fm3.next_line(BytePos(24));
1061-
fm3.next_line(BytePos(34));
1088+
fm2.next_line(fm2.start_pos);
1089+
fm3.next_line(fm3.start_pos);
1090+
fm3.next_line(fm3.start_pos + BytePos(12));
10621091

10631092
cm
10641093
}
@@ -1068,11 +1097,15 @@ mod tests {
10681097
// Test lookup_byte_offset
10691098
let cm = init_code_map();
10701099

1071-
let fmabp1 = cm.lookup_byte_offset(BytePos(22));
1100+
let fmabp1 = cm.lookup_byte_offset(BytePos(23));
10721101
assert_eq!(fmabp1.fm.name, "blork.rs");
1073-
assert_eq!(fmabp1.pos, BytePos(22));
1102+
assert_eq!(fmabp1.pos, BytePos(23));
1103+
1104+
let fmabp1 = cm.lookup_byte_offset(BytePos(24));
1105+
assert_eq!(fmabp1.fm.name, "empty.rs");
1106+
assert_eq!(fmabp1.pos, BytePos(0));
10741107

1075-
let fmabp2 = cm.lookup_byte_offset(BytePos(24));
1108+
let fmabp2 = cm.lookup_byte_offset(BytePos(25));
10761109
assert_eq!(fmabp2.fm.name, "blork2.rs");
10771110
assert_eq!(fmabp2.pos, BytePos(0));
10781111
}
@@ -1085,7 +1118,7 @@ mod tests {
10851118
let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
10861119
assert_eq!(cp1, CharPos(22));
10871120

1088-
let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
1121+
let cp2 = cm.bytepos_to_file_charpos(BytePos(25));
10891122
assert_eq!(cp2, CharPos(0));
10901123
}
10911124

@@ -1099,7 +1132,7 @@ mod tests {
10991132
assert_eq!(loc1.line, 2);
11001133
assert_eq!(loc1.col, CharPos(10));
11011134

1102-
let loc2 = cm.lookup_char_pos(BytePos(24));
1135+
let loc2 = cm.lookup_char_pos(BytePos(25));
11031136
assert_eq!(loc2.file.name, "blork2.rs");
11041137
assert_eq!(loc2.line, 1);
11051138
assert_eq!(loc2.col, CharPos(0));
@@ -1115,18 +1148,18 @@ mod tests {
11151148
"first line€€.\n€ second line".to_string());
11161149

11171150
fm1.next_line(BytePos(0));
1118-
fm1.next_line(BytePos(22));
1119-
fm2.next_line(BytePos(40));
1120-
fm2.next_line(BytePos(58));
1151+
fm1.next_line(BytePos(28));
1152+
fm2.next_line(fm2.start_pos);
1153+
fm2.next_line(fm2.start_pos + BytePos(20));
11211154

11221155
fm1.record_multibyte_char(BytePos(3), 3);
11231156
fm1.record_multibyte_char(BytePos(9), 3);
11241157
fm1.record_multibyte_char(BytePos(12), 3);
11251158
fm1.record_multibyte_char(BytePos(15), 3);
11261159
fm1.record_multibyte_char(BytePos(18), 3);
1127-
fm2.record_multibyte_char(BytePos(50), 3);
1128-
fm2.record_multibyte_char(BytePos(53), 3);
1129-
fm2.record_multibyte_char(BytePos(58), 3);
1160+
fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
1161+
fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
1162+
fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);
11301163

11311164
cm
11321165
}
@@ -1172,27 +1205,14 @@ mod tests {
11721205
Span { lo: BytePos(left_index), hi: BytePos(right_index + 1), expn_id: NO_EXPANSION }
11731206
}
11741207

1175-
fn new_filemap_and_lines(cm: &CodeMap, filename: &str, input: &str) -> Rc<FileMap> {
1176-
let fm = cm.new_filemap(filename.to_string(), input.to_string());
1177-
let mut byte_pos: u32 = 0;
1178-
for line in input.lines() {
1179-
// register the start of this line
1180-
fm.next_line(BytePos(byte_pos));
1181-
1182-
// update byte_pos to include this line and the \n at the end
1183-
byte_pos += line.len() as u32 + 1;
1184-
}
1185-
fm
1186-
}
1187-
11881208
/// Test span_to_snippet and span_to_lines for a span coverting 3
11891209
/// lines in the middle of a file.
11901210
#[test]
11911211
fn span_to_snippet_and_lines_spanning_multiple_lines() {
11921212
let cm = CodeMap::new();
11931213
let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
11941214
let selection = " \n ^~\n~~~\n~~~~~ \n \n";
1195-
new_filemap_and_lines(&cm, "blork.rs", inputtext);
1215+
cm.new_filemap_and_lines("blork.rs", inputtext);
11961216
let span = span_from_selection(inputtext, selection);
11971217

11981218
// check that we are extracting the text we thought we were extracting

0 commit comments

Comments
 (0)