|
| 1 | +#! /usr/bin/env python |
| 2 | +import gi |
| 3 | +try: |
| 4 | + gi.require_version ('GMime', '3.0') |
| 5 | +except ValueError: |
| 6 | + gi.require_version ('GMime', '2.6') |
| 7 | + |
| 8 | +from gi.repository import GMime |
| 9 | + |
| 10 | +from pygments import highlight |
| 11 | +from pygments.lexers import guess_lexer |
| 12 | +from pygments.formatters import HtmlFormatter |
| 13 | + |
| 14 | +class SyntaxHighlight: |
| 15 | + |
| 16 | + def high (self, segment): |
| 17 | + lexer = guess_lexer (segment) |
| 18 | + |
| 19 | + print ("filtering: guessed language:", str(lexer)) |
| 20 | + |
| 21 | + return highlight (segment, lexer, HtmlFormatter (noclasses = True)) |
| 22 | + |
| 23 | + def do_filter_part (self, text, html, mime_type, is_patch): |
| 24 | + """ |
| 25 | + Filter the part and output safe HTML. |
| 26 | +
|
| 27 | + Search for code tags (``` or <code>), or determine if part is a patch. |
| 28 | + Syntax highlight the relevant parts and use the html part for the rest. |
| 29 | + """ |
| 30 | + |
| 31 | + ## Try to figure out if part is a patch |
| 32 | + if is_patch: |
| 33 | + from pygments.lexers.diff import DiffLexer |
| 34 | + return highlight (text, DiffLexer (), HtmlFormatter (noclasses = True)) |
| 35 | + |
| 36 | + ## Look for code segments between code-tags |
| 37 | + starttags = [ '```', '<code>' ] |
| 38 | + endtags = [ '```', '</code>' ] |
| 39 | + |
| 40 | + if mime_type == 'text/html': |
| 41 | + for tag, antitag in zip(starttags, endtags): |
| 42 | + i = 0 |
| 43 | + |
| 44 | + def tags (): |
| 45 | + nonlocal i |
| 46 | + i = html.find (tag, i) |
| 47 | + if i != -1: |
| 48 | + i += len (tag) |
| 49 | + yield i |
| 50 | + |
| 51 | + for j in tags (): |
| 52 | + e = html.find (antitag, i) |
| 53 | + |
| 54 | + if e != -1: |
| 55 | + segment = self.high (html[i:e]) |
| 56 | + html = html[:i-len(tag)] + segment + html[e + len(antitag):] |
| 57 | + |
| 58 | + i += len(segment) - len(tag) |
| 59 | + |
| 60 | + else: |
| 61 | + break |
| 62 | + |
| 63 | + return html |
| 64 | + |
| 65 | + elif mime_type == 'text/plain': |
| 66 | + # The GMime filter has created the HTML line-for-line. So if we find the |
| 67 | + # code tag on a line, it matches the same line in the HTML part. |
| 68 | + |
| 69 | + text_lines = text.split ('\n') |
| 70 | + html_lines = html.split ('\n') |
| 71 | + |
| 72 | + no = 0 |
| 73 | + offset = 0 # offset between HTML and TEXT part after syntax highlighting a segment |
| 74 | + while no < len(text_lines): |
| 75 | + l = text_lines[no] |
| 76 | + for tag, antitag in zip (starttags, endtags): |
| 77 | + it = l.find (tag) |
| 78 | + if it > -1: |
| 79 | + ih = html_lines[no + offset].find (tag) |
| 80 | + |
| 81 | + # find end |
| 82 | + for eno,el in enumerate (text_lines[no:]): |
| 83 | + iet = el.find (antitag) |
| 84 | + ieh = html_lines[no + eno + offset].find (antitag) |
| 85 | + |
| 86 | + if (eno > 0 and iet > -1) or (eno == 0 and iet > it): |
| 87 | + # found end tag |
| 88 | + segment = text_lines[no:no + eno+1] |
| 89 | + segment[0] = segment[0][it + len(tag):] |
| 90 | + segment[-1] = segment[-1][:iet] |
| 91 | + |
| 92 | + html_segment = self.high ('\n'.join (segment)).split ('\n') |
| 93 | + |
| 94 | + html_segment[0] = html_lines[no + offset][ih + len(tag):] + html_segment[0] |
| 95 | + html_segment[-1] = html_segment[-1] + html_lines[no + eno + offset][ieh + len(antitag):] |
| 96 | + |
| 97 | + html_lines = html_lines[:no + offset] + html_segment + html_lines[no + eno + offset +1:] |
| 98 | + |
| 99 | + offset += len(html_segment) - len(segment) |
| 100 | + no += eno |
| 101 | + break |
| 102 | + break |
| 103 | + no += 1 |
| 104 | + |
| 105 | + return '\n'.join(html_lines) |
| 106 | + |
| 107 | + else: |
| 108 | + return html |
| 109 | + |
| 110 | + |
0 commit comments