@@ -9,6 +9,7 @@ use std::collections::HashMap;
9
9
10
10
const AIPR_HEADER_TEMPLATE : & str = include_str ! ( "./templates/header.hbs" ) ;
11
11
const AIPR_FOOTER_HTML : & str = include_str ! ( "./templates/footer.html" ) ;
12
+ const MDLINK_TEMPLATE : & str = include_str ! ( "./templates/md_link.hbs" ) ;
12
13
const WORDS_PER_MINUTE : usize = 200 ;
13
14
14
15
#[ derive( Default ) ]
@@ -53,6 +54,14 @@ impl Preprocessor for AIPRPreprocessor {
53
54
}
54
55
55
56
fn replace_all ( s : & str , num_words : usize ) -> String {
57
+ // First replace all AIPR links
58
+ let aipr_replaced = replace_all_aipr_links ( s, num_words) ;
59
+
60
+ // Then replace all Markdown links
61
+ replace_all_md_links ( & aipr_replaced)
62
+ }
63
+
64
+ fn replace_all_aipr_links ( s : & str , num_words : usize ) -> String {
56
65
// This implementation follows closely to the implementation of
57
66
// mdbook::preprocess::links::replace_all.
58
67
let mut previous_end_index = 0 ;
@@ -69,6 +78,35 @@ fn replace_all(s: &str, num_words: usize) -> String {
69
78
replaced
70
79
}
71
80
81
+ fn replace_all_md_links ( s : & str ) -> String {
82
+ let mut previous_end_index = 0 ;
83
+ let mut replaced = String :: new ( ) ;
84
+
85
+ for link in find_md_links ( s) {
86
+ // Add text up to the current link
87
+ let prefix = & s[ previous_end_index..link. start_index ] ;
88
+ replaced. push_str ( prefix) ;
89
+
90
+ // Check if the prefix ends with a backslash or exclamation mark
91
+ let last_char = prefix. chars ( ) . last ( ) ;
92
+ let is_escaped = last_char == Some ( '\\' ) || last_char == Some ( '!' ) ;
93
+
94
+ if is_escaped {
95
+ // For escaped links, just add the original link text
96
+ replaced. push_str ( & s[ link. start_index ..link. end_index ] ) ;
97
+ } else {
98
+ // For normal links, render as HTML
99
+ let new_content = link. render ( ) . unwrap ( ) ;
100
+ replaced. push_str ( & new_content) ;
101
+ }
102
+
103
+ previous_end_index = link. end_index ;
104
+ }
105
+
106
+ replaced. push_str ( & s[ previous_end_index..] ) ;
107
+ replaced
108
+ }
109
+
72
110
#[ derive( PartialEq , Debug , Clone ) ]
73
111
enum AIPRLinkType {
74
112
Header ( AIPRHeaderSettings ) ,
@@ -230,6 +268,86 @@ fn find_aipr_links(contents: &str) -> AIPRLinkIter<'_> {
230
268
AIPRLinkIter ( RE . captures_iter ( contents) )
231
269
}
232
270
271
+ #[ derive( PartialEq , Debug , Clone ) ]
272
+ struct MDLink < ' a > {
273
+ start_index : usize ,
274
+ end_index : usize ,
275
+ text : & ' a str ,
276
+ url : & ' a str ,
277
+ }
278
+
279
+ impl < ' a > MDLink < ' a > {
280
+ #[ allow( dead_code) ]
281
+ fn from_capture ( cap : Captures < ' a > ) -> Option < MDLink < ' a > > {
282
+ let md_tuple = match ( cap. get ( 0 ) , cap. get ( 1 ) , cap. get ( 2 ) ) {
283
+ ( _, Some ( text_str) , Some ( url_str) )
284
+ if ( url_str. as_str ( ) . starts_with ( "https://" )
285
+ || url_str. as_str ( ) . starts_with ( "http://" ) ) =>
286
+ {
287
+ Some ( ( text_str. as_str ( ) , url_str. as_str ( ) ) )
288
+ }
289
+ _ => None ,
290
+ } ;
291
+
292
+ md_tuple. and_then ( |( text, url) | {
293
+ cap. get ( 0 ) . map ( |mat| MDLink {
294
+ start_index : mat. start ( ) ,
295
+ end_index : mat. end ( ) ,
296
+ text,
297
+ url,
298
+ } )
299
+ } )
300
+ }
301
+
302
+ #[ allow( dead_code) ]
303
+ fn render ( & self ) -> anyhow:: Result < String > {
304
+ let mut handlebars = Handlebars :: new ( ) ;
305
+
306
+ // register template
307
+ handlebars
308
+ . register_template_string ( "md_link_expansion" , MDLINK_TEMPLATE )
309
+ . unwrap ( ) ;
310
+
311
+ // create data for rendering handlebar
312
+ let mut data = Map :: new ( ) ;
313
+ data. insert ( "text" . to_string ( ) , to_json ( self . text ) ) ;
314
+ data. insert ( "url" . to_string ( ) , to_json ( self . url ) ) ;
315
+
316
+ // render
317
+ let html_string = handlebars. render ( "md_link_expansion" , & data) ?;
318
+
319
+ Ok ( html_string)
320
+ }
321
+ }
322
+
323
+ struct MDLinkIter < ' a > ( CaptureMatches < ' a , ' a > ) ;
324
+
325
+ impl < ' a > Iterator for MDLinkIter < ' a > {
326
+ type Item = MDLink < ' a > ;
327
+ fn next ( & mut self ) -> Option < MDLink < ' a > > {
328
+ for cap in & mut self . 0 {
329
+ if let Some ( inc) = MDLink :: from_capture ( cap) {
330
+ return Some ( inc) ;
331
+ }
332
+ }
333
+ None
334
+ }
335
+ }
336
+
337
+ fn find_md_links ( contents : & str ) -> MDLinkIter < ' _ > {
338
+ static RE : Lazy < Regex > = Lazy :: new ( || {
339
+ Regex :: new (
340
+ r"(?x)
341
+ \[([^\]]*(?:\\.[^\]]*)*)\] # link text in square brackets
342
+ \(([^)]*(?:\\.[^)]*)*)\) # link URL in parentheses
343
+ " ,
344
+ )
345
+ . unwrap ( )
346
+ } ) ;
347
+
348
+ MDLinkIter ( RE . captures_iter ( contents) )
349
+ }
350
+
233
351
#[ cfg( test) ]
234
352
mod tests {
235
353
use super :: * ;
@@ -238,14 +356,15 @@ mod tests {
238
356
239
357
#[ fixture]
240
358
fn simple_book_content ( ) -> String {
241
- "{{ #aipr_header }} {{ #aipr_header colab=nlp/lora.ipynb }} Some random text with and more text ..."
359
+ "{{ #aipr_header }} {{ #aipr_header colab=nlp/lora.ipynb }} Some random [ text with](https://fake.io) and more text ..."
242
360
. to_string ( )
243
361
}
244
362
245
363
#[ rstest]
246
364
fn test_find_links_no_author_links ( ) -> Result < ( ) > {
247
365
let s = "Some random text without link..." ;
248
366
assert ! ( find_aipr_links( s) . collect:: <Vec <_>>( ) == vec![ ] ) ;
367
+ assert ! ( find_md_links( s) . collect:: <Vec <_>>( ) == vec![ ] ) ;
249
368
Ok ( ( ) )
250
369
}
251
370
@@ -259,8 +378,9 @@ mod tests {
259
378
260
379
#[ rstest]
261
380
fn test_find_links_unknown_link_type ( ) -> Result < ( ) > {
262
- let s = "Some random text with {{#my_author ar.rs}} and {{#auth}} {{baz}} {{#bar}}..." ;
381
+ let s = "Some random \\ [ text with\\ ](test) {{#my_author ar.rs}} and {{#auth}} {{baz}} {{#bar}}..." ;
263
382
assert ! ( find_aipr_links( s) . collect:: <Vec <_>>( ) == vec![ ] ) ;
383
+ assert ! ( find_md_links( s) . collect:: <Vec <_>>( ) == vec![ ] ) ;
264
384
Ok ( ( ) )
265
385
}
266
386
@@ -405,4 +525,56 @@ mod tests {
405
525
406
526
Ok ( ( ) )
407
527
}
528
+
529
+ #[ rstest]
530
+ fn test_finds_md_link ( simple_book_content : String ) -> Result < ( ) > {
531
+ let res = find_md_links ( & simple_book_content[ ..] ) . collect :: < Vec < _ > > ( ) ;
532
+ println ! ( "\n OUTPUT: {res:?}\n " ) ;
533
+
534
+ assert_eq ! (
535
+ res,
536
+ vec![ MDLink {
537
+ start_index: 71 ,
538
+ end_index: 99 ,
539
+ text: "text with" ,
540
+ url: "https://fake.io"
541
+ } ]
542
+ ) ;
543
+
544
+ Ok ( ( ) )
545
+ }
546
+
547
+ #[ rstest]
548
+ fn test_md_link_render ( ) -> Result < ( ) > {
549
+ let link = MDLink {
550
+ start_index : 19 ,
551
+ end_index : 58 ,
552
+ text : "some text" ,
553
+ url : "https://fake.io" ,
554
+ } ;
555
+
556
+ let html_string = link. render ( ) ?;
557
+ let expected = "<a href=\" https://fake.io\" target=\" _blank\" \
558
+ rel=\" noopener noreferrer\" >some text</a>\n ";
559
+
560
+ assert_eq ! ( html_string, expected) ;
561
+
562
+ Ok ( ( ) )
563
+ }
564
+
565
+ #[ rstest]
566
+ fn test_replace_all_md_links ( ) -> Result < ( ) > {
567
+ let content = "This is [good link](https://good.io), \
568
+ whereas , and \
569
+ neither is \\ [this\\ ](http://not-covered.io).";
570
+
571
+ let new_content = replace_all_md_links ( content) ;
572
+ let expected = "This is <a href=\" https://good.io\" target=\" _blank\" \
573
+ rel=\" noopener noreferrer\" >good link</a>\n , whereas , \
574
+ and neither is \\ [this\\ ](http://not-covered.io).";
575
+
576
+ assert_eq ! ( new_content, expected) ;
577
+
578
+ Ok ( ( ) )
579
+ }
408
580
}
0 commit comments