Merge pull request #25 from sionide21/pure-links-whitespace

Retain whitespace when parsing pure links
RobertDober · Aug 21, 2020 · 2530202 · 2530202
2 parents d9283fd + dfac8cb
commit 2530202
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 10 deletions.
diff --git a/lib/earmark_parser/helpers/pure_link_helpers.ex b/lib/earmark_parser/helpers/pure_link_helpers.ex
@@ -7,13 +7,13 @@ defmodule EarmarkParser.Helpers.PureLinkHelpers do
   @pure_link_rgx ~r{\A(\s*)(\()?(https?://[[:alnum:]"'*@:+-_{\}()/.%\#]*)}u
   def convert_pure_link(src) do
     case Regex.run(@pure_link_rgx, src) do
-      [_match, spaces, "", link_text] -> reparse_link(String.length(spaces), link_text)
-      [_match, spaces, _, link_text]  -> remove_trailing_closing_parens(String.length(spaces), link_text) 
+      [_match, spaces, "", link_text] -> reparse_link(spaces, link_text)
+      [_match, spaces, _, link_text]  -> remove_trailing_closing_parens(spaces, link_text)
       _ -> nil
       end
   end
 
-  defp determine_ending_parens_by_count(leading_spaces_count, prefix, surplus_on_closing_parens) do
+  defp determine_ending_parens_by_count(leading_spaces, prefix, surplus_on_closing_parens) do
     graphemes = String.graphemes(prefix)
     open_parens_count = Enum.count(graphemes, &(&1 == "("))
     close_parens_count = Enum.count(graphemes, &(&1 == ")"))
@@ -22,25 +22,31 @@ defmodule EarmarkParser.Helpers.PureLinkHelpers do
     needed =
     :lists.duplicate(max(0, take), ")")
     |> Enum.join
-    {link(prefix <> needed), String.length(prefix) + leading_spaces_count + max(0,take)} 
+    link = link(prefix <> needed)
+    ast =
+      case leading_spaces do
+        "" -> link
+        _ -> [leading_spaces, link]
+      end
+    {ast, String.length(prefix) + String.length(leading_spaces) + max(0,take)}
   end
 
   @split_at_ending_parens ~r{(.*?)(\)*)\z}
-  defp remove_trailing_closing_parens(leading_spaces_count, link_text) do
+  defp remove_trailing_closing_parens(leading_spaces, link_text) do
     [_, _prefix, suffix] = Regex.run(@split_at_ending_parens, link_text)
     case suffix do
-      "" -> {"(", leading_spaces_count + 1}
+      "" -> {"(", String.length(leading_spaces) + 1}
       _  -> case convert_pure_link(betail(link_text, 1)) do
         {link, length} -> {["(", link, ")"], length + 2}
         _ -> nil
       end
     end
   end
 
-  defp reparse_link(leading_spaces_count, link_text) do
+  defp reparse_link(leading_spaces, link_text) do
     [_, prefix, suffix] = Regex.run(@split_at_ending_parens, link_text)
     nof_closing_parens = String.length(suffix)
-    determine_ending_parens_by_count(leading_spaces_count, prefix, nof_closing_parens)
+    determine_ending_parens_by_count(leading_spaces, prefix, nof_closing_parens)
   end
 
   defp link(text), do: render_link(text, text)

diff --git a/test/acceptance/ast/links_images/pure_links_test.exs b/test/acceptance/ast/links_images/pure_links_test.exs
@@ -18,7 +18,7 @@ defmodule Acceptance.Ast.LinksImages.PureLinksTest do
   describe "enabled pure links" do
     test "two in a row" do
       markdown = "https://github.com/pragdave/earmark https://github.com/RobertDober/extractly"
-      html = "<p><a href=\"https://github.com/pragdave/earmark\">https://github.com/pragdave/earmark</a> <a href=\"https://github.com/RobertDober/extractly\">https://github.com/RobertDober/extractly</a></p>\n"
+      html = "<p><a href=\"https://github.com/pragdave/earmark\">https://github.com/pragdave/earmark</a>&#x20;<a href=\"https://github.com/RobertDober/extractly\">https://github.com/RobertDober/extractly</a></p>\n"
       ast      = parse_html(html)
       messages = []
 
@@ -54,7 +54,17 @@ defmodule Acceptance.Ast.LinksImages.PureLinksTest do
 
     test "correct mix" do
       markdown = "[https://erlang.org](https://erlang.org) https://elixir.lang"
-      html = "<p><a href=\"https://erlang.org\">https://erlang.org</a> <a href=\"https://elixir.lang\">https://elixir.lang</a></p>\n"
+      html = "<p><a href=\"https://erlang.org\">https://erlang.org</a>&#x20;<a href=\"https://elixir.lang\">https://elixir.lang</a></p>\n"
+      ast      = parse_html(html)
+      messages = []
+
+      assert as_ast(markdown) == {:ok, ast, messages}
+    end
+
+    test "leading whitespace is preserved" do
+      markdown = "**Test**     https://www.google.com"
+      # This needs to be `&#x20;` instead of ` ` because Floki strips out blank text nodes
+      html = "<p><strong>Test</strong>&#x20;&#x20;&#x20;&#x20;&#x20;<a href=\"https://www.google.com\">https://www.google.com</a></p>\n"
       ast      = parse_html(html)
       messages = []