Skip to content

Commit 68cb21c

Browse files
authored
[Feature] - Open links in new tab by expanding unescaped markdown links (#31)
* wip * add md_link.hbs * add replace all links * unit tests * test render * more unit tests * integration test
1 parent f92d2f9 commit 68cb21c

File tree

4 files changed

+177
-2
lines changed

4 files changed

+177
-2
lines changed

src/ai_pocket_reference.rs

+174-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::collections::HashMap;
99

1010
const AIPR_HEADER_TEMPLATE: &str = include_str!("./templates/header.hbs");
1111
const AIPR_FOOTER_HTML: &str = include_str!("./templates/footer.html");
12+
const MDLINK_TEMPLATE: &str = include_str!("./templates/md_link.hbs");
1213
const WORDS_PER_MINUTE: usize = 200;
1314

1415
#[derive(Default)]
@@ -53,6 +54,14 @@ impl Preprocessor for AIPRPreprocessor {
5354
}
5455

5556
fn replace_all(s: &str, num_words: usize) -> String {
57+
// First replace all AIPR links
58+
let aipr_replaced = replace_all_aipr_links(s, num_words);
59+
60+
// Then replace all Markdown links
61+
replace_all_md_links(&aipr_replaced)
62+
}
63+
64+
fn replace_all_aipr_links(s: &str, num_words: usize) -> String {
5665
// This implementation follows closely to the implementation of
5766
// mdbook::preprocess::links::replace_all.
5867
let mut previous_end_index = 0;
@@ -69,6 +78,35 @@ fn replace_all(s: &str, num_words: usize) -> String {
6978
replaced
7079
}
7180

81+
fn replace_all_md_links(s: &str) -> String {
82+
let mut previous_end_index = 0;
83+
let mut replaced = String::new();
84+
85+
for link in find_md_links(s) {
86+
// Add text up to the current link
87+
let prefix = &s[previous_end_index..link.start_index];
88+
replaced.push_str(prefix);
89+
90+
// Check if the prefix ends with a backslash or exclamation mark
91+
let last_char = prefix.chars().last();
92+
let is_escaped = last_char == Some('\\') || last_char == Some('!');
93+
94+
if is_escaped {
95+
// For escaped links, just add the original link text
96+
replaced.push_str(&s[link.start_index..link.end_index]);
97+
} else {
98+
// For normal links, render as HTML
99+
let new_content = link.render().unwrap();
100+
replaced.push_str(&new_content);
101+
}
102+
103+
previous_end_index = link.end_index;
104+
}
105+
106+
replaced.push_str(&s[previous_end_index..]);
107+
replaced
108+
}
109+
72110
#[derive(PartialEq, Debug, Clone)]
73111
enum AIPRLinkType {
74112
Header(AIPRHeaderSettings),
@@ -230,6 +268,86 @@ fn find_aipr_links(contents: &str) -> AIPRLinkIter<'_> {
230268
AIPRLinkIter(RE.captures_iter(contents))
231269
}
232270

271+
#[derive(PartialEq, Debug, Clone)]
272+
struct MDLink<'a> {
273+
start_index: usize,
274+
end_index: usize,
275+
text: &'a str,
276+
url: &'a str,
277+
}
278+
279+
impl<'a> MDLink<'a> {
280+
#[allow(dead_code)]
281+
fn from_capture(cap: Captures<'a>) -> Option<MDLink<'a>> {
282+
let md_tuple = match (cap.get(0), cap.get(1), cap.get(2)) {
283+
(_, Some(text_str), Some(url_str))
284+
if (url_str.as_str().starts_with("https://")
285+
|| url_str.as_str().starts_with("http://")) =>
286+
{
287+
Some((text_str.as_str(), url_str.as_str()))
288+
}
289+
_ => None,
290+
};
291+
292+
md_tuple.and_then(|(text, url)| {
293+
cap.get(0).map(|mat| MDLink {
294+
start_index: mat.start(),
295+
end_index: mat.end(),
296+
text,
297+
url,
298+
})
299+
})
300+
}
301+
302+
#[allow(dead_code)]
303+
fn render(&self) -> anyhow::Result<String> {
304+
let mut handlebars = Handlebars::new();
305+
306+
// register template
307+
handlebars
308+
.register_template_string("md_link_expansion", MDLINK_TEMPLATE)
309+
.unwrap();
310+
311+
// create data for rendering handlebar
312+
let mut data = Map::new();
313+
data.insert("text".to_string(), to_json(self.text));
314+
data.insert("url".to_string(), to_json(self.url));
315+
316+
// render
317+
let html_string = handlebars.render("md_link_expansion", &data)?;
318+
319+
Ok(html_string)
320+
}
321+
}
322+
323+
struct MDLinkIter<'a>(CaptureMatches<'a, 'a>);
324+
325+
impl<'a> Iterator for MDLinkIter<'a> {
326+
type Item = MDLink<'a>;
327+
fn next(&mut self) -> Option<MDLink<'a>> {
328+
for cap in &mut self.0 {
329+
if let Some(inc) = MDLink::from_capture(cap) {
330+
return Some(inc);
331+
}
332+
}
333+
None
334+
}
335+
}
336+
337+
fn find_md_links(contents: &str) -> MDLinkIter<'_> {
338+
static RE: Lazy<Regex> = Lazy::new(|| {
339+
Regex::new(
340+
r"(?x)
341+
\[([^\]]*(?:\\.[^\]]*)*)\] # link text in square brackets
342+
\(([^)]*(?:\\.[^)]*)*)\) # link URL in parentheses
343+
",
344+
)
345+
.unwrap()
346+
});
347+
348+
MDLinkIter(RE.captures_iter(contents))
349+
}
350+
233351
#[cfg(test)]
234352
mod tests {
235353
use super::*;
@@ -238,14 +356,15 @@ mod tests {
238356

239357
#[fixture]
240358
fn simple_book_content() -> String {
241-
"{{ #aipr_header }} {{ #aipr_header colab=nlp/lora.ipynb }} Some random text with and more text ..."
359+
"{{ #aipr_header }} {{ #aipr_header colab=nlp/lora.ipynb }} Some random [text with](https://fake.io) and more text ..."
242360
.to_string()
243361
}
244362

245363
#[rstest]
246364
fn test_find_links_no_author_links() -> Result<()> {
247365
let s = "Some random text without link...";
248366
assert!(find_aipr_links(s).collect::<Vec<_>>() == vec![]);
367+
assert!(find_md_links(s).collect::<Vec<_>>() == vec![]);
249368
Ok(())
250369
}
251370

@@ -259,8 +378,9 @@ mod tests {
259378

260379
#[rstest]
261380
fn test_find_links_unknown_link_type() -> Result<()> {
262-
let s = "Some random text with {{#my_author ar.rs}} and {{#auth}} {{baz}} {{#bar}}...";
381+
let s = "Some random \\[text with\\](test) {{#my_author ar.rs}} and {{#auth}} {{baz}} {{#bar}}...";
263382
assert!(find_aipr_links(s).collect::<Vec<_>>() == vec![]);
383+
assert!(find_md_links(s).collect::<Vec<_>>() == vec![]);
264384
Ok(())
265385
}
266386

@@ -405,4 +525,56 @@ mod tests {
405525

406526
Ok(())
407527
}
528+
529+
#[rstest]
530+
fn test_finds_md_link(simple_book_content: String) -> Result<()> {
531+
let res = find_md_links(&simple_book_content[..]).collect::<Vec<_>>();
532+
println!("\nOUTPUT: {res:?}\n");
533+
534+
assert_eq!(
535+
res,
536+
vec![MDLink {
537+
start_index: 71,
538+
end_index: 99,
539+
text: "text with",
540+
url: "https://fake.io"
541+
}]
542+
);
543+
544+
Ok(())
545+
}
546+
547+
#[rstest]
548+
fn test_md_link_render() -> Result<()> {
549+
let link = MDLink {
550+
start_index: 19,
551+
end_index: 58,
552+
text: "some text",
553+
url: "https://fake.io",
554+
};
555+
556+
let html_string = link.render()?;
557+
let expected = "<a href=\"https://fake.io\" target=\"_blank\" \
558+
rel=\"noopener noreferrer\">some text</a>\n";
559+
560+
assert_eq!(html_string, expected);
561+
562+
Ok(())
563+
}
564+
565+
#[rstest]
566+
fn test_replace_all_md_links() -> Result<()> {
567+
let content = "This is [good link](https://good.io), \
568+
whereas ![this](https://not-covered.io), and \
569+
neither is \\[this\\](http://not-covered.io).";
570+
571+
let new_content = replace_all_md_links(content);
572+
let expected = "This is <a href=\"https://good.io\" target=\"_blank\" \
573+
rel=\"noopener noreferrer\">good link</a>\n, whereas ![this](https://not-covered.io), \
574+
and neither is \\[this\\](http://not-covered.io).";
575+
576+
assert_eq!(new_content, expected);
577+
578+
Ok(())
579+
}
408580
}

src/templates/md_link.hbs

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<a href="{{url}}" target="_blank" rel="noopener noreferrer">{{text}}</a>

test_book/src/chapter_1/index.md

+2
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
{{ #aipr_header }}
44

55
This has light emphasis and bold emphasis.
6+
7+
[Vector Insitute](https://vectorinstitute.ai) is known for doing great work!

0 commit comments

Comments
 (0)