@@ -8,6 +8,17 @@ use pyo3::exceptions::PyValueError;
88
99extern "C" { fn tree_sitter_python ( ) -> tree_sitter:: Language ; }
1010
11+ fn remap_queries ( src : & str ) -> String {
12+ src. replace ( "@text.title" , "@markup.heading" )
13+ . replace ( "@text.literal" , "@markup.raw" )
14+ . replace ( "@text.emphasis" , "@markup.italic" )
15+ . replace ( "@text.strong" , "@markup.bold" )
16+ . replace ( "@text.uri" , "@markup.link.url" )
17+ . replace ( "@text.reference" , "@markup.link.label" )
18+ . replace ( "@string.escape" , "@constant.character.escape" )
19+ . replace ( "@none" , "@comment" )
20+ }
21+
1122static PY_CONFIG : LazyLock < HighlightConfiguration > = LazyLock :: new ( || {
1223 let mut c = HighlightConfiguration :: new (
1324 unsafe { tree_sitter_python ( ) } , "python" ,
@@ -19,6 +30,28 @@ static PY_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
1930 c
2031} ) ;
2132
33+ static MD_BLOCK_CONFIG : LazyLock < HighlightConfiguration > = LazyLock :: new ( || {
34+ let hl = remap_queries ( tree_sitter_md:: HIGHLIGHT_QUERY_BLOCK ) ;
35+ let inj = tree_sitter_md:: INJECTION_QUERY_BLOCK ;
36+ let mut c = HighlightConfiguration :: new (
37+ tree_sitter_md:: LANGUAGE . into ( ) , "markdown" ,
38+ & hl, inj, "" ,
39+ ) . expect ( "Failed to load Markdown block config" ) ;
40+ c. configure ( HIGHLIGHT_NAMES ) ;
41+ c
42+ } ) ;
43+
44+ static MD_INLINE_CONFIG : LazyLock < HighlightConfiguration > = LazyLock :: new ( || {
45+ let hl = remap_queries ( tree_sitter_md:: HIGHLIGHT_QUERY_INLINE ) ;
46+ let inj = tree_sitter_md:: INJECTION_QUERY_INLINE ;
47+ let mut c = HighlightConfiguration :: new (
48+ tree_sitter_md:: INLINE_LANGUAGE . into ( ) , "markdown_inline" ,
49+ & hl, inj, "" ,
50+ ) . expect ( "Failed to load Markdown inline config" ) ;
51+ c. configure ( HIGHLIGHT_NAMES ) ;
52+ c
53+ } ) ;
54+
2255fn parse_lang ( lang : & str ) -> PyResult < Language > {
2356 Language :: from_token ( lang)
2457 . ok_or_else ( || PyValueError :: new_err ( format ! ( "Unknown language: {lang}" ) ) )
@@ -52,6 +85,44 @@ fn tokenize(code: &str, lang: &str) -> PyResult<Vec<(usize, usize, String)>> {
5285 } ) . map_err ( |e| PyValueError :: new_err ( format ! ( "Highlight error: {e}" ) ) ) ?;
5386 return run_highlights ( events) ;
5487 }
88+ if lang == "markdown" || lang == "md" {
89+ let mut h = TSHighlighter :: new ( ) ;
90+ let block_events = h. highlight ( & MD_BLOCK_CONFIG , code. as_bytes ( ) , None , |_| None )
91+ . map_err ( |e| PyValueError :: new_err ( format ! ( "Highlight error: {e}" ) ) ) ?;
92+ let mut toks = run_highlights ( block_events) ?;
93+ let mut parser = tree_sitter:: Parser :: new ( ) ;
94+ parser. set_language ( & tree_sitter_md:: LANGUAGE . into ( ) ) . ok ( ) ;
95+ if let Some ( tree) = parser. parse ( code, None ) {
96+ let mut cursor = tree. walk ( ) ;
97+ loop {
98+ let node = cursor. node ( ) ;
99+ if node. kind ( ) == "inline" {
100+ let start = node. start_byte ( ) ;
101+ let end = node. end_byte ( ) ;
102+ if start < end {
103+ let slice = & code. as_bytes ( ) [ start..end] ;
104+ let mut h2 = TSHighlighter :: new ( ) ;
105+ let events = h2. highlight ( & MD_INLINE_CONFIG , slice, None , |t| {
106+ Language :: from_token ( t) . map ( |l| l. config ( ) )
107+ } ) ;
108+ if let Ok ( evts) = events {
109+ let collected: Vec < _ > = evts. collect ( ) ;
110+ if let Ok ( inline_toks) = run_highlights ( collected. into_iter ( ) ) {
111+ for ( s, e, kind) in inline_toks { toks. push ( ( start + s, start + e, kind) ) ; }
112+ }
113+ }
114+ }
115+ }
116+ if cursor. goto_first_child ( ) { continue ; }
117+ while !cursor. goto_next_sibling ( ) {
118+ if !cursor. goto_parent ( ) { break ; }
119+ }
120+ if cursor. node ( ) == tree. root_node ( ) { break ; }
121+ }
122+ }
123+ toks. sort_by_key ( |( s, _, _) | * s) ;
124+ return Ok ( toks) ;
125+ }
55126 let language = parse_lang ( lang) ?;
56127 let mut h = Highlighter :: new ( ) ;
57128 let source = code. to_string ( ) ;
@@ -141,18 +212,18 @@ fn highlight_spans(code: &str, lang: &str, class_prefix: Option<&str>) -> PyResu
141212
142213#[ pyfunction]
143214fn languages ( ) -> Vec < & ' static str > {
144- vec ! [
215+ let all = vec ! [
145216 "ada" , "asm" , "astro" , "awk" , "bash" , "bibtex" , "bicep" , "blueprint" , "c" , "capnp" ,
146217 "clojure" , "c_sharp" , "commonlisp" , "cpp" , "css" , "cue" , "d" , "dart" , "diff" ,
147218 "dockerfile" , "eex" , "elisp" , "elixir" , "elm" , "erlang" , "forth" , "fortran" , "gdscript" ,
148219 "gleam" , "glsl" , "go" , "haskell" , "hcl" , "heex" , "html" , "iex" , "ini" , "java" ,
149220 "javascript" , "json" , "jsx" , "kotlin" , "latex" , "llvm" , "lua" , "make" , "markdown" , "md" ,
150- "matlab" , "meson" ,
151- "nim " , "nix " , "objc " , "ocaml " , "openscad " , "pascal " , "php " , "plaintext " , "proto " ,
152- "python " , "r " , "racket " , "regex " , "ruby " , "rust " , "scala " , "scheme " , "scss " , "sql " ,
153- "svelte " , "swift " , "toml " , "typescript " , "tsx" , "vim" , "wast" , "wat" , "x86asm" , "wgsl ",
154- "yaml" , "zig" ,
155- ]
221+ "matlab" , "meson" , "nim" , "nix" , "objc" , "ocaml" , "openscad" , "pascal" , "php" ,
222+ "plaintext " , "proto " , "python " , "r " , "racket " , "regex " , "ruby " , "rust " , "scala " ,
223+ "scheme " , "scss " , "sql " , "svelte " , "swift " , "toml " , "typescript " , "tsx " , "vim " , "wast " ,
224+ "wat " , "x86asm " , "wgsl " , "yaml " , "zig " ,
225+ ] ;
226+ all . into_iter ( ) . filter ( |t| matches ! ( * t , "python" | "py" | "markdown" | "md" ) || Language :: from_token ( t ) . is_some ( ) ) . collect ( )
156227}
157228
158229fn lookup_vendored ( name : & str ) -> PyResult < & ' static str > {
@@ -334,4 +405,4 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
334405 m. add_function ( wrap_pyfunction ! ( theme_css, m) ?) ?;
335406 m. add_function ( wrap_pyfunction ! ( themes, m) ?) ?;
336407 Ok ( ( ) )
337- }
408+ }
0 commit comments