Skip to content

Commit 7506cb6

Browse files
committed
lua - wrap lpeg parsing around unicode protection calls
1 parent 82f0bb0 commit 7506cb6

File tree

4 files changed

+68
-13
lines changed

4 files changed

+68
-13
lines changed

src/resources/filters/customnodes/shortcodes.lua

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,6 @@ function shortcodes_filter()
225225
end
226226
local result = callShortcodeHandler(handler, shortcode_struct)
227227
return pandoc.utils.stringify(result)
228-
229-
-- return "<<<" .. table.concat(lst, " ") .. ">>>"
230228
end,
231229
})
232230
local filter
@@ -265,7 +263,7 @@ function shortcodes_filter()
265263
return
266264
end
267265

268-
el.text = code_shortcode:match(el.text)
266+
el.text = shortcode_lpeg.wrap_lpeg_match(code_shortcode, el.text)
269267
return el
270268
end
271269

@@ -284,11 +282,11 @@ function shortcodes_filter()
284282
Shortcode = inline_handler,
285283
RawInline = code_handler,
286284
Image = function(el)
287-
el.src = code_shortcode:match(el.src)
285+
el.src = shortcode_lpeg.wrap_lpeg_match(code_shortcode, el.src)
288286
return el
289287
end,
290288
Link = function(el)
291-
el.target = code_shortcode:match(el.target)
289+
el.target = shortcode_lpeg.wrap_lpeg_match(code_shortcode, el.target)
292290
return el
293291
end,
294292
Span = function(el)

src/resources/pandoc/datadir/lpegshortcode.lua

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,57 @@ if os.getenv("LUA_TESTING") ~= nil then
280280
print("Tests passed")
281281
end
282282

283+
-- replace multi-character code points with an escaped version
284+
-- that contains an UUID that we can use to restore the original
285+
-- without worrying about collisions from user code that uses
286+
-- the same escape syntax
287+
local function escape_unicode(txt)
288+
local result = {}
289+
for _, c in utf8.codes(txt) do
290+
if c > 127 then
291+
table.insert(result, string.format("cf5733e5-0370-4aae-8689-61bad1dd9ec0&#x%x;", c))
292+
else
293+
table.insert(result, utf8.char(c))
294+
end
295+
end
296+
return table.concat(result, "")
297+
end
298+
299+
-- replace escaped code points with their unescaped version
300+
local function unescape_unicode(txt)
301+
return txt:gsub("cf5733e5%-0370%-4aae%-8689%-61bad1dd9ec0&#x([0-9a-fA-F]+);", function (c)
302+
return utf8.char(tonumber(c, 16))
303+
end)
304+
end
305+
306+
local function wrap_lpeg_match(pattern, txt)
307+
txt = escape_unicode(txt)
308+
txt = pattern:match(txt)
309+
if txt == nil then
310+
return nil
311+
end
312+
txt = unescape_unicode(txt)
313+
return txt
314+
end
315+
283316
return {
284-
md_shortcode = md_shortcode,
317+
lpegs = {
318+
md_shortcode = md_shortcode,
319+
unshortcode = unshortcode -- for undoing shortcodes in non-markdown contexts
320+
},
321+
322+
parse_md_shortcode = function(txt)
323+
return wrap_lpeg_match(md_shortcode, txt)
324+
end,
325+
326+
-- use this to undo shortcode parsing in non-markdown contexts
327+
unparse_md_shortcode = function(txt)
328+
return wrap_lpeg_match(unshortcode, txt)
329+
end,
330+
285331
make_shortcode_parser = make_shortcode_parser,
286-
unshortcode = unshortcode -- for undoing shortcodes in non-markdown contexts
332+
333+
-- use this to safely call an lpeg pattern with a string
334+
-- that contains multi-byte code points
335+
wrap_lpeg_match = wrap_lpeg_match
287336
}

src/resources/pandoc/datadir/readqmd.lua

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,7 @@ end
126126

127127
local function readqmd(txt, opts)
128128
txt, tags = escape_invalid_tags(txt)
129-
txt = md_shortcode.md_shortcode:match(txt)
130-
129+
txt = md_shortcode.parse_md_shortcode(txt)
131130
local flavor = {
132131
format = "markdown",
133132
extensions = {},
@@ -150,7 +149,7 @@ local function readqmd(txt, opts)
150149

151150
local unshortcode_text = function (c)
152151
if c.text:match("data%-is%-shortcode%=%\"1%\"") then
153-
c.text = md_shortcode.unshortcode:match(c.text)
152+
c.text = md_shortcode.unparse_md_shortcode(c.text)
154153
end
155154
return c
156155
end
@@ -159,7 +158,7 @@ local function readqmd(txt, opts)
159158
CodeBlock = function (cb)
160159
cb.classes = cb.classes:map(restore_invalid_tags)
161160
if cb.text:match("data%-is%-shortcode%=%\"1%\"") then
162-
cb.text = md_shortcode.unshortcode:match(cb.text)
161+
cb.text = md_shortcode.unparse_md_shortcode(cb.text)
163162
end
164163
cb.text = unescape_invalid_tags(cb.text, tags)
165164
return cb
@@ -169,13 +168,13 @@ local function readqmd(txt, opts)
169168
RawBlock = unshortcode_text,
170169
Link = function (l)
171170
if l.target:match("data%-is%-shortcode%=%%221%%22") then
172-
l.target = md_shortcode.unshortcode:match(urldecode(l.target))
171+
l.target = md_shortcode.unparse_md_shortcode(urldecode(l.target))
173172
return l
174173
end
175174
end,
176175
Image = function (i)
177176
if i.src:match("data%-is%-shortcode%=%%221%%22") then
178-
i.src = md_shortcode.unshortcode:match(urldecode(i.src))
177+
i.src = md_shortcode.unparse_md_shortcode(urldecode(i.src))
179178
return i
180179
end
181180
end,
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
title: issue-8485
3+
format: html
4+
classe_rugosità: value
5+
---
6+
7+
this shortcode will raise a encoding error on 1.4.549
8+
9+
{{< meta classe_rugosità >}}

0 commit comments

Comments
 (0)