Skip to content

Commit

Permalink
Merge pull request #25 from sionide21/pure-links-whitespace
Browse files Browse the repository at this point in the history
Retain whitespace when parsing pure links
  • Loading branch information
RobertDober authored Aug 21, 2020
2 parents d9283fd + dfac8cb commit 2530202
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 10 deletions.
22 changes: 14 additions & 8 deletions lib/earmark_parser/helpers/pure_link_helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ defmodule EarmarkParser.Helpers.PureLinkHelpers do
@pure_link_rgx ~r{\A(\s*)(\()?(https?://[[:alnum:]"'*@:+-_{\}()/.%\#]*)}u
def convert_pure_link(src) do
case Regex.run(@pure_link_rgx, src) do
[_match, spaces, "", link_text] -> reparse_link(String.length(spaces), link_text)
[_match, spaces, _, link_text] -> remove_trailing_closing_parens(String.length(spaces), link_text)
[_match, spaces, "", link_text] -> reparse_link(spaces, link_text)
[_match, spaces, _, link_text] -> remove_trailing_closing_parens(spaces, link_text)
_ -> nil
end
end

defp determine_ending_parens_by_count(leading_spaces_count, prefix, surplus_on_closing_parens) do
defp determine_ending_parens_by_count(leading_spaces, prefix, surplus_on_closing_parens) do
graphemes = String.graphemes(prefix)
open_parens_count = Enum.count(graphemes, &(&1 == "("))
close_parens_count = Enum.count(graphemes, &(&1 == ")"))
Expand All @@ -22,25 +22,31 @@ defmodule EarmarkParser.Helpers.PureLinkHelpers do
needed =
:lists.duplicate(max(0, take), ")")
|> Enum.join
{link(prefix <> needed), String.length(prefix) + leading_spaces_count + max(0,take)}
link = link(prefix <> needed)
ast =
case leading_spaces do
"" -> link
_ -> [leading_spaces, link]
end
{ast, String.length(prefix) + String.length(leading_spaces) + max(0,take)}
end

@split_at_ending_parens ~r{(.*?)(\)*)\z}
defp remove_trailing_closing_parens(leading_spaces_count, link_text) do
defp remove_trailing_closing_parens(leading_spaces, link_text) do
[_, _prefix, suffix] = Regex.run(@split_at_ending_parens, link_text)
case suffix do
"" -> {"(", leading_spaces_count + 1}
"" -> {"(", String.length(leading_spaces) + 1}
_ -> case convert_pure_link(betail(link_text, 1)) do
{link, length} -> {["(", link, ")"], length + 2}
_ -> nil
end
end
end

defp reparse_link(leading_spaces_count, link_text) do
defp reparse_link(leading_spaces, link_text) do
[_, prefix, suffix] = Regex.run(@split_at_ending_parens, link_text)
nof_closing_parens = String.length(suffix)
determine_ending_parens_by_count(leading_spaces_count, prefix, nof_closing_parens)
determine_ending_parens_by_count(leading_spaces, prefix, nof_closing_parens)
end

defp link(text), do: render_link(text, text)
Expand Down
14 changes: 12 additions & 2 deletions test/acceptance/ast/links_images/pure_links_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ defmodule Acceptance.Ast.LinksImages.PureLinksTest do
describe "enabled pure links" do
test "two in a row" do
markdown = "https://github.com/pragdave/earmark https://github.com/RobertDober/extractly"
html = "<p><a href=\"https://github.com/pragdave/earmark\">https://github.com/pragdave/earmark</a> <a href=\"https://github.com/RobertDober/extractly\">https://github.com/RobertDober/extractly</a></p>\n"
html = "<p><a href=\"https://github.com/pragdave/earmark\">https://github.com/pragdave/earmark</a>&#x20;<a href=\"https://github.com/RobertDober/extractly\">https://github.com/RobertDober/extractly</a></p>\n"
ast = parse_html(html)
messages = []

Expand Down Expand Up @@ -54,7 +54,17 @@ defmodule Acceptance.Ast.LinksImages.PureLinksTest do

test "correct mix" do
markdown = "[https://erlang.org](https://erlang.org) https://elixir.lang"
html = "<p><a href=\"https://erlang.org\">https://erlang.org</a> <a href=\"https://elixir.lang\">https://elixir.lang</a></p>\n"
html = "<p><a href=\"https://erlang.org\">https://erlang.org</a>&#x20;<a href=\"https://elixir.lang\">https://elixir.lang</a></p>\n"
ast = parse_html(html)
messages = []

assert as_ast(markdown) == {:ok, ast, messages}
end

test "leading whitespace is preserved" do
markdown = "**Test** https://www.google.com"
# This needs to be `&#x20;` instead of ` ` because Floki strips out blank text nodes
html = "<p><strong>Test</strong>&#x20;&#x20;&#x20;&#x20;&#x20;<a href=\"https://www.google.com\">https://www.google.com</a></p>\n"
ast = parse_html(html)
messages = []

Expand Down

0 comments on commit 2530202

Please sign in to comment.