Skip to content

Commit feaf5cf

Browse files
committed
Use a line_offset vector to track offsets for inline sourcepos
1 parent 9f4d391 commit feaf5cf

File tree

8 files changed

+149
-44
lines changed

8 files changed

+149
-44
lines changed

Diff for: src/cm.rs

+7-7
Original file line numberDiff line numberDiff line change
@@ -544,13 +544,13 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> {
544544
let info = ncb.info.as_bytes();
545545
let literal = ncb.literal.as_bytes();
546546

547-
if info.is_empty()
548-
&& (literal.len() > 2
549-
&& !isspace(literal[0])
550-
&& !(isspace(literal[literal.len() - 1])
551-
&& isspace(literal[literal.len() - 2])))
552-
&& !first_in_list_item
553-
&& !self.options.render.prefer_fenced
547+
#[allow(clippy::len_zero)]
548+
if !(info.len() > 0
549+
|| literal.len() <= 2
550+
|| isspace(literal[0])
551+
|| first_in_list_item
552+
|| self.options.render.prefer_fenced
553+
|| isspace(literal[literal.len() - 1]) && isspace(literal[literal.len() - 2]))
554554
{
555555
write!(self, " ").unwrap();
556556
write!(self.prefix, " ").unwrap();

Diff for: src/nodes.rs

+2
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ pub struct Ast {
534534
pub(crate) open: bool,
535535
pub(crate) last_line_blank: bool,
536536
pub(crate) table_visited: bool,
537+
pub(crate) line_offsets: Vec<usize>,
537538
}
538539

539540
/// Represents the position in the source Markdown this node was rendered from.
@@ -609,6 +610,7 @@ impl Ast {
609610
open: true,
610611
last_line_blank: false,
611612
table_visited: false,
613+
line_offsets: Vec::with_capacity(0),
612614
}
613615
}
614616
}

Diff for: src/parser/autolink.rs

+5-8
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,11 @@ pub(crate) fn process_autolinks<'a>(
4141
}
4242
}
4343

44-
match contents[i] {
45-
b'@' => {
46-
post_org = email_match(arena, contents, i, relaxed_autolinks);
47-
if post_org.is_some() {
48-
break;
49-
}
44+
if contents[i] == b'@' {
45+
post_org = email_match(arena, contents, i, relaxed_autolinks);
46+
if post_org.is_some() {
47+
break;
5048
}
51-
_ => (),
5249
}
5350
i += 1;
5451
}
@@ -161,7 +158,7 @@ fn check_domain(data: &[u8], allow_short: bool) -> Option<usize> {
161158
}
162159

163160
fn is_valid_hostchar(ch: char) -> bool {
164-
!ch.is_whitespace() && !(ch.is_punctuation() || ch.is_symbol())
161+
!(ch.is_whitespace() || ch.is_punctuation() || ch.is_symbol())
165162
}
166163

167164
fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize {

Diff for: src/parser/inlines.rs

+13-7
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ pub struct Subject<'a: 'd, 'r, 'o, 'c, 'd, 'i> {
3131
pub input: &'i [u8],
3232
line: usize,
3333
pub pos: usize,
34-
block_offset: usize,
3534
column_offset: isize,
35+
line_offset: usize,
3636
flags: Flags,
3737
pub refmap: &'r mut RefMap,
3838
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
@@ -116,7 +116,6 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
116116
options: &'o Options<'c>,
117117
input: &'i [u8],
118118
line: usize,
119-
block_offset: usize,
120119
refmap: &'r mut RefMap,
121120
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
122121
) -> Self {
@@ -126,8 +125,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
126125
input,
127126
line,
128127
pos: 0,
129-
block_offset,
130128
column_offset: 0,
129+
line_offset: 0,
131130
flags: Flags::default(),
132131
refmap,
133132
delimiter_arena,
@@ -182,6 +181,11 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
182181
None => return false,
183182
Some(ch) => *ch as char,
184183
};
184+
185+
let node_ast = node.data.borrow();
186+
let adjusted_line = self.line - node_ast.sourcepos.start.line;
187+
self.line_offset = node_ast.line_offsets[adjusted_line];
188+
185189
let new_inl: Option<&'a AstNode<'a>> = match c {
186190
'\0' => return false,
187191
'\r' | '\n' => Some(self.handle_newline()),
@@ -1604,7 +1608,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
16041608
inl.data.borrow_mut().sourcepos.start.column =
16051609
bracket_inl_text.data.borrow().sourcepos.start.column;
16061610
inl.data.borrow_mut().sourcepos.end.column = usize::try_from(
1607-
self.pos as isize + self.column_offset + self.block_offset as isize,
1611+
self.pos as isize + self.column_offset + self.line_offset as isize,
16081612
)
16091613
.unwrap();
16101614
bracket_inl_text.insert_before(inl);
@@ -1655,7 +1659,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
16551659
.sourcepos
16561660
.start;
16571661
inl.data.borrow_mut().sourcepos.end.column =
1658-
usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize)
1662+
usize::try_from(self.pos as isize + self.column_offset + self.line_offset as isize)
16591663
.unwrap();
16601664

16611665
self.brackets[brackets_len - 1].inl_text.insert_before(inl);
@@ -1847,8 +1851,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
18471851
end_column: usize,
18481852
) -> &'a AstNode<'a> {
18491853
let start_column =
1850-
start_column as isize + 1 + self.column_offset + self.block_offset as isize;
1851-
let end_column = end_column as isize + 1 + self.column_offset + self.block_offset as isize;
1854+
start_column as isize + 1 + self.column_offset + self.line_offset as isize;
1855+
let end_column = end_column as isize + 1 + self.column_offset + self.line_offset as isize;
18521856

18531857
let ast = Ast {
18541858
value,
@@ -1864,6 +1868,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
18641868
open: false,
18651869
last_line_blank: false,
18661870
table_visited: false,
1871+
line_offsets: Vec::with_capacity(0),
18671872
};
18681873
self.arena.alloc(Node::new(RefCell::new(ast)))
18691874
}
@@ -1972,6 +1977,7 @@ pub fn make_inline<'a>(
19721977
open: false,
19731978
last_line_blank: false,
19741979
table_visited: false,
1980+
line_offsets: Vec::with_capacity(0),
19751981
};
19761982
arena.alloc(Node::new(RefCell::new(ast)))
19771983
}

Diff for: src/parser/mod.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ pub fn parse_document<'a>(
6767
open: true,
6868
last_line_blank: false,
6969
table_visited: false,
70+
line_offsets: Vec::with_capacity(0),
7071
})));
7172
let mut parser = Parser::new(arena, root, options);
7273
let mut linebuf = Vec::with_capacity(buffer.len());
@@ -1998,6 +1999,11 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
19981999
}
19992000
}
20002001
if self.offset < line.len() {
2002+
// since whitespace is stripped off the beginning of lines, we need to keep
2003+
// track of how much was stripped off. This allows us to properly calculate
2004+
// inline sourcepos during inline processing.
2005+
ast.line_offsets.push(self.offset);
2006+
20012007
ast.content
20022008
.push_str(str::from_utf8(&line[self.offset..]).unwrap());
20032009
}
@@ -2185,7 +2191,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
21852191
self.options,
21862192
content,
21872193
node_data.sourcepos.start.line,
2188-
node_data.sourcepos.start.column - 1 + node_data.internal_offset,
21892194
&mut self.refmap,
21902195
&delimiter_arena,
21912196
);
@@ -2439,7 +2444,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
24392444
self.options,
24402445
content,
24412446
0, // XXX -1 in upstream; never used?
2442-
0,
24432447
&mut self.refmap,
24442448
&delimiter_arena,
24452449
);

Diff for: src/parser/table.rs

+14-10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ fn try_opening_header<'a>(
116116
start.column_add((cell.end_offset - header_row.paragraph_offset) as isize);
117117
ast.internal_offset = cell.internal_offset;
118118
ast.content.clone_from(&cell.content);
119+
ast.line_offsets.push(
120+
start.column + cell.start_offset - 1 + cell.internal_offset
121+
- header_row.paragraph_offset,
122+
);
119123

120124
i += 1;
121125
}
@@ -172,6 +176,9 @@ fn try_opening_row<'a>(
172176
cell_ast.internal_offset = cell.internal_offset;
173177
cell_ast.sourcepos.end.column = sourcepos.start.column + cell.end_offset;
174178
cell_ast.content.clone_from(&cell.content);
179+
cell_ast
180+
.line_offsets
181+
.push(sourcepos.start.column + cell.start_offset - 1 + cell.internal_offset);
175182

176183
last_column = cell_ast.sourcepos.end.column;
177184

@@ -295,16 +302,13 @@ fn try_inserting_table_header_paragraph<'a>(
295302
let mut paragraph = Ast::new(NodeValue::Paragraph, start);
296303
paragraph.sourcepos.end.line = start.line + newlines - 1;
297304

298-
// XXX We don't have the last_line_length to go on by this point,
299-
// so we have no idea what the end column should be.
300-
// We can't track it in row() like we do paragraph_offset, because
301-
// we've already discarded the leading whitespace for that line.
302-
// This is hard to avoid with this backtracking approach to
303-
// creating the pre-table paragraph — we're doing the work of
304-
// finalize() here, but without the parser state at that time.
305-
// Approximate by just counting the line length as it is and adding
306-
// to the start column.
307-
paragraph.sourcepos.end.column = start.column - 1
305+
// copy over the line offsets related to the paragraph
306+
for n in 0..newlines {
307+
paragraph.line_offsets.push(container_ast.line_offsets[n]);
308+
}
309+
310+
let last_line_offset = *paragraph.line_offsets.last().unwrap_or(&0);
311+
paragraph.sourcepos.end.column = last_line_offset
308312
+ preface
309313
.iter()
310314
.rev()

Diff for: src/tests/core.rs

+100-4
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,6 @@ fn link_sourcepos_newline() {
529529
);
530530
}
531531

532-
// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960.
533-
#[ignore]
534532
#[test]
535533
fn link_sourcepos_truffle() {
536534
assert_ast_match!(
@@ -577,8 +575,6 @@ fn link_sourcepos_truffle_twist() {
577575
);
578576
}
579577

580-
// Ignored per https://github.com/kivikakk/comrak/pull/439#issuecomment-2225129960.
581-
#[ignore]
582578
#[test]
583579
fn link_sourcepos_truffle_bergamot() {
584580
assert_ast_match!(
@@ -601,3 +597,103 @@ fn link_sourcepos_truffle_bergamot() {
601597
])
602598
);
603599
}
600+
601+
#[test]
602+
fn link_sourcepos_inline_paragraph_multiline() {
603+
assert_ast_match!(
604+
[],
605+
" A\n"
606+
" B\n",
607+
(document (1:1-2:4) [
608+
(paragraph (1:3-2:4) [
609+
(text (1:3-1:3) "A")
610+
(softbreak (1:4-1:4))
611+
(text (2:4-2:4) "B")
612+
])
613+
])
614+
);
615+
}
616+
617+
#[test]
618+
fn link_sourcepos_inline_listitem_multiline() {
619+
assert_ast_match!(
620+
[],
621+
"- A\n"
622+
"B\n",
623+
(document (1:1-2:1) [
624+
(list (1:1-2:1) [
625+
(item (1:1-2:1) [
626+
(paragraph (1:3-2:1) [
627+
(text (1:3-1:3) "A")
628+
(softbreak (1:4-1:4))
629+
(text (2:1-2:1) "B")
630+
])
631+
])
632+
])
633+
])
634+
);
635+
}
636+
637+
#[test]
638+
fn link_sourcepos_inline_listitem_multiline_2() {
639+
assert_ast_match!(
640+
[],
641+
"- A\n"
642+
" B\n"
643+
"- C\n"
644+
" D",
645+
(document (1:1-4:2) [
646+
(list (1:1-4:2) [
647+
(item (1:1-2:4) [
648+
(paragraph (1:3-2:4) [
649+
(text (1:3-1:3) "A")
650+
(softbreak (1:4-1:4))
651+
(text (2:4-2:4) "B")
652+
])
653+
])
654+
(item (3:1-4:2) [
655+
(paragraph (3:4-4:2) [
656+
(text (3:4-3:4) "C")
657+
(softbreak (3:5-3:5))
658+
(text (4:2-4:2) "D")
659+
])
660+
])
661+
])
662+
])
663+
);
664+
}
665+
666+
#[test]
667+
fn link_sourcepos_inline_double_emphasis_1() {
668+
assert_ast_match!(
669+
[],
670+
"_**this**_\n",
671+
(document (1:1-1:10) [
672+
(paragraph (1:1-1:10) [
673+
(emph (1:1-1:10) [
674+
(strong (1:2-1:9) [
675+
(text (1:4-1:7) "this")
676+
])
677+
])
678+
])
679+
])
680+
);
681+
}
682+
683+
#[ignore]
684+
#[test]
685+
fn link_sourcepos_inline_double_emphasis_2() {
686+
assert_ast_match!(
687+
[],
688+
"___this___\n",
689+
(document (1:1-1:10) [
690+
(paragraph (1:1-1:10) [
691+
(emph (1:1-1:10) [
692+
(strong (1:2-1:9) [
693+
(text (1:4-1:7) "this")
694+
])
695+
])
696+
])
697+
])
698+
);
699+
}

Diff for: src/tests/table.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -192,14 +192,10 @@ fn sourcepos_with_preceding_para_offset() {
192192
" | c | d |\n"
193193
,
194194
(document (1:1-5:10) [
195-
196-
// XXX This should be 1:2-2:5; see
197-
// crate::parser::table::try_inserting_table_header_paragraph.
198-
(paragraph (1:2-2:4) [
199-
195+
(paragraph (1:2-2:5) [
200196
(text (1:2-1:4) "123")
201197
(softbreak (1:5-1:5))
202-
(text (2:2-2:4) "456")
198+
(text (2:3-2:5) "456")
203199
])
204200
(table (3:2-5:10) [
205201
(table_row (3:2-3:10) [

0 commit comments

Comments
 (0)