-
-
Notifications
You must be signed in to change notification settings - Fork 170
Adding non-breakable spaces lua filter #119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 14 commits
1c951b5
ed6a50a
dd2f12e
36c91bf
7df6f7c
64bf968
e167899
5e829a0
6a62c16
6a8bc4f
d0bc724
feec602
0cca88c
ea8e13a
f78a1fa
4b5d58a
d5fce8b
80a6900
25a7564
a4c6e81
5f13001
f060207
c02aa69
22c7c82
fa6a2b8
7008e81
a675d44
5ebf029
bc67eb1
7de7a14
76b00e7
11794ff
7536ba9
dad0797
8f7501f
ce063f3
d407e90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Non-breakable space filter | ||
|
||
This filter replaces regular spaces with non-breakable spaces according to | ||
predefined conditions. Currently, this filter replaces regular spaces with | ||
unbreakable ones after one-letter words (prefixes and conjunctions): | ||
'a', 'i', 'k', 'o', 's', 'u', 'v', 'z'; and theyre uppercase variant. Also | ||
inserts non-breakable spaces in front of en-dashes and in front of numbers. | ||
Some extra effort is taken in detecting these patterns in *not-fully* parsed | ||
strings (for example, if this filter is used after some macro replacing | ||
filter). | ||
|
||
In this regard this filter functions similarly like TeX `vlna` preprocessor | ||
or LuaTeX `luavlna` package. | ||
|
||
The default settings are conformant to Czech typography rules, but these can | ||
be changed easily by user customization in filter file `nonbreakablespace.lua` | ||
by changing contents of `prefixes` or `dashes` tables. | ||
|
||
Currently supported formats are: | ||
|
||
* LaTeX a ConTeXt | ||
* Open Office Document | ||
* MS Word | ||
* HTML | ||
|
||
**NOTE**: Using this filter increases strain on line-breaking patterns. Whenever | ||
possible, consider allowing hyphenation. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<h1 id="tests">Tests</h1> | ||
<h2 id="basic-test">Basic test</h2> | ||
<p>a test i test k test o test s test u test v test z test A test I test K test O test S test U test V test Z test – test – test</p> | ||
<h2 id="test-with-numbers">Test with numbers</h2> | ||
<p>Test 19 test “19” test</p> | ||
<h2 id="test-of-double-prefixes.">Test of double prefixes.</h2> | ||
<p>A i test, i v test, a k test, a v test.</p> | ||
<h2 id="test-of-block-code">Test of block code</h2> | ||
<pre><code>a = 5 | ||
k = "test"</code></pre> | ||
<h2 id="test-of-inline-code">Test of inline code</h2> | ||
<p>Test <code>a = 5</code> test</p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<h1 id="tests">Tests</h1> | ||
<h2 id="basic-test">Basic test</h2> | ||
<p>a test i test A test I test the test The test – test – test</p> | ||
<h2 id="test-with-numbers">Test with numbers</h2> | ||
<p>Test 19 test “19” test</p> | ||
<h2 id="test-of-double-prefixes.">Test of double prefixes.</h2> | ||
<p>A i test, i v test, a k test, a v test.</p> | ||
<h2 id="test-of-block-code">Test of block code</h2> | ||
<pre><code>a = 5 | ||
k = "test"</code></pre> | ||
<h2 id="test-of-inline-code">Test of inline code</h2> | ||
<p>Test <code>a = 5</code> test</p> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
DIFF ?= diff --strip-trailing-cr -u | ||
|
||
test: | ||
@pandoc --lua-filter=pandocVlna.lua sampleCZ.md | $(DIFF) expectedCZ.html - | ||
@pandoc --lua-filter=pandocVlna.lua sampleEN.md | $(DIFF) expectedEN.html - | ||
.PHONY: test |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
local utils = require 'pandoc.utils' | ||
local stringify = utils.stringify | ||
|
||
--[[ | ||
Indexed table of one-letter prefixes, after which should be inserted '\160'. | ||
Verbose, but can be changed per user requirements. | ||
--]] | ||
|
||
local prefixes = {} | ||
|
||
local prefixesEN = { | ||
'I', | ||
'a', | ||
'A', | ||
'the', | ||
'The' | ||
} | ||
|
||
local prefixes = { | ||
'a', | ||
'i', | ||
'k', | ||
'o', | ||
's', | ||
'u', | ||
'v', | ||
'z', | ||
'A', | ||
'I', | ||
'K', | ||
'O', | ||
'S', | ||
'U', | ||
'V', | ||
'Z' | ||
} | ||
|
||
-- Set `prefixes` according to `lang` metadata value | ||
function Meta(meta) | ||
if meta.lang then | ||
langSet = stringify(meta.lang) | ||
|
||
if langSet == 'cs' then | ||
prefixes = prefixesCZ | ||
else | ||
prefixes = prefixesEN --default to english prefixes | ||
end | ||
|
||
else | ||
prefixes = prefixesEN --default to english prefixes | ||
end | ||
|
||
return prefixes | ||
end | ||
|
||
--[[ | ||
Some languages (czech among them) require nonbreakable space *before* long dash | ||
--]] | ||
|
||
local dashes = { | ||
'--', | ||
'–' | ||
} | ||
|
||
--[[ | ||
Table of replacement elements | ||
--]] | ||
|
||
local nonbreakablespaces = { | ||
html = ' ', | ||
latex = '~', | ||
context = '~' | ||
} | ||
|
||
--[[ | ||
Function responsible for searching for one-letter prefixes, after which is | ||
inserted non-breakable space. Function is short-circuited, that means: | ||
|
||
* If it finds match with `prefix` in `prefixes` table, then it returns `true`. | ||
* Otherwise, after the iteration is finished, returns `false` (prefix wasnt | ||
found). | ||
--]] | ||
|
||
function find_one_letter_prefix(my_string) | ||
for index, prefix in ipairs(prefixes) do | ||
if my_string == prefix then | ||
return true | ||
end | ||
end | ||
return false | ||
end | ||
|
||
--[[ | ||
Function responsible for searching for dashes, before whose is inserted | ||
non-breakable space. Function is short-circuited, that means: | ||
|
||
* If it finds match with `dash` in `dashes` table, then it returns `true`. | ||
* Otherwise, after the iteration is finished, returns `false` (dash wasnt | ||
found). | ||
--]] | ||
|
||
function find_dashes(my_dash) | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for index, dash in ipairs(dashes) do | ||
if my_dash == dash then | ||
return true | ||
end | ||
end | ||
return false | ||
end | ||
|
||
--[[ | ||
Function to determine Space element replacement for non-breakable space according to output format | ||
--]] | ||
|
||
function insert_nonbreakable_space(format) | ||
if format == 'html' then | ||
return pandoc.RawInline('html', nonbreakablespaces.html) | ||
elseif format:match 'latex' then | ||
return pandoc.RawInline('tex',nonbreakablespaces.latex) | ||
elseif format:match 'context' then | ||
return pandoc.RawInline('tex',nonbreakablespaces.latex) | ||
else | ||
--fallback to inserting non-breakable space unicode symbol | ||
return pandoc.Str '\u{a0}' | ||
end | ||
end | ||
|
||
--[[ | ||
Core filter function: | ||
|
||
* It iterates over all inline elements in block | ||
* If it finds Space element, uses previously defined functions to find | ||
`prefixes` or `dashes` | ||
* Replaces Space element with `Str '\u{a0}'`, which is non-breakable space | ||
representation | ||
* Returns modified list of inlines | ||
--]] | ||
|
||
function Inlines (inlines) | ||
|
||
--variable holding replacement value for the non-breakable space | ||
local insert = insert_nonbreakable_space(FORMAT) | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
for i = 1, #inlines do | ||
if inlines[i].t == 'Space' then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about naming the elements which we are looking at? I find There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I can accomodate for that, eventhough in this case for me personally was easier to see exactly what is it I am calling at. I have made required changes, but I am having trouble assigning the replacement string wtih them, meaning if I write:
it doesnt work. But this works:
works. It makes sense that these make-up variables dont reassing to original inlines list, but how can I accomodate for that? I have also noticed another issue - writing I have uploaded new files. Please, let me know what you think about it. I personally in this specific case would prefer writing |
||
|
||
-- Check for one-letter prefixes in Str before Space | ||
|
||
if inlines[i - 1].t == 'Str' then | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
local one_letter_prefix = find_one_letter_prefix(inlines[i - 1].text) | ||
if one_letter_prefix == true then | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
-- inlines[i] = pandoc.Str '\xc2\xa0' -- Both work | ||
inlines[i] = insert | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
end | ||
end | ||
|
||
-- Check for dashes in Str after Space | ||
|
||
if inlines[i + 1].t == 'Str' then | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
local dash = find_dashes(inlines[i + 1].text) | ||
if dash == true then | ||
inlines[i] = insert | ||
end | ||
end | ||
|
||
-- Check for not fully parsed Str elements - Those might be products of | ||
-- other filters, that were executed before this one | ||
|
||
if inlines[i + 1].t == 'Str' then | ||
if string.match(inlines[i + 1].text, '%.*%s*[„]?%d+[“]?%s*%.*') then | ||
Delanii marked this conversation as resolved.
Show resolved
Hide resolved
|
||
inlines[i] = insert | ||
end | ||
end | ||
|
||
end | ||
|
||
--[[ | ||
Check for Str containing sequence " prefix ", which might occur in case of | ||
preceding filter creates it in one Str element. Also check, if quotation | ||
mark is present introduced by "quotation.lua" filter | ||
--]] | ||
|
||
if inlines[i].t == 'Str' then | ||
for index, prefix in ipairs(prefixes) do | ||
if string.match(inlines[i].text, '%.*%s+[„]?' .. prefix .. '[“]?%s+%.*') then | ||
front, detection, replacement, back = string.match(inlines[i].c, | ||
'(%.*)(%s+[„]?' .. prefix .. '[“]?)(%s+)(%.*)') | ||
|
||
inlines[i].text = front .. detection .. insert .. back | ||
end | ||
end | ||
end | ||
|
||
end | ||
return inlines | ||
end | ||
|
||
-- This should change the order of running functions: Meta - Inlines - rest ... | ||
return { | ||
{Meta = Meta}, | ||
{Inlines = Inlines}, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
--- | ||
lang: cs | ||
--- | ||
|
||
# Tests | ||
|
||
## Basic test | ||
|
||
a test i test k test o test s test u test v test z test A test I test K test O test S test U test V test Z test -- test – test | ||
|
||
## Test with numbers | ||
|
||
Test 19 test "19" test | ||
|
||
## Test of double prefixes. | ||
|
||
A i test, i v test, a k test, a v test. | ||
|
||
## Test of block code | ||
|
||
``` | ||
a = 5 | ||
k = "test" | ||
``` | ||
|
||
## Test of inline code | ||
|
||
Test `a = 5` test |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
--- | ||
lang: cs | ||
--- | ||
|
||
# Tests | ||
|
||
## Basic test | ||
|
||
a test i test A test I test the test The test -- test – test | ||
|
||
## Test with numbers | ||
|
||
Test 19 test "19" test | ||
|
||
## Test of double prefixes. | ||
|
||
A i test, i v test, a k test, a v test. | ||
|
||
## Test of block code | ||
|
||
``` | ||
a = 5 | ||
k = "test" | ||
``` | ||
|
||
## Test of inline code | ||
|
||
Test `a = 5` test |
Uh oh!
There was an error while loading. Please reload this page.