Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

do not allow utf-8 in domains to avoid lookalike character attacks #63

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ext/rinku/autolink.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#define strncasecmp _strnicmp
#endif

// supported protocol and followed by a normal character
bool
autolink_issafe(const uint8_t *link, size_t link_len)
{
Expand Down Expand Up @@ -169,7 +170,9 @@ check_domain(const uint8_t *data, size_t size,

for (i = link->start + 1; i < size - 1; ++i) {
if (data[i] == '.') np++;
else if (!rinku_isalnum(data[i]) && data[i] != '-') break;
else if (rinku_isalnum(data[i]) || data[i] == '-') { } // valid domain part
else if (data[i] == '?' || data[i] == '/' || rinku_isspace(data[i])) { break; } // end of domain
else if (data[i] >= 128) { return false; } // strange utf8 ... possibly lookalike characters
}

link->end = i;
Expand Down Expand Up @@ -279,18 +282,23 @@ autolink__url(
{
assert(data[pos] == ':');

// check that we have "://" and a few characters
if ((size - pos) < 4 || data[pos + 1] != '/' || data[pos + 2] != '/')
return false;

// move after the ://
link->start = pos + 3;
link->end = 0;

// if there is no domain in this url then stop
if (!check_domain(data, size, link, flags & AUTOLINK_SHORT_DOMAINS))
return false;

// find where the url ends
link->start = pos;
link->end = utf8proc_find_space(data, link->end, size);

// move to before the protocol
while (link->start && rinku_isalpha(data[link->start - 1]))
link->start--;

Expand Down
20 changes: 20 additions & 0 deletions test/autolink_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -412,4 +412,24 @@ def test_urls_with_entities_and_parens
assert_linked "URL is #{generate_result(url, "mailto:#{url}")}.", "URL is #{url}."
assert_linked "(URL is #{generate_result(url, "mailto:#{url}")}.)", "(URL is #{url}.)"
end

# see http://www.irongeek.com/homoglyph-attack-generator.php
def test_does_not_autolink_unicode_urls
[
"http://exampl\u0435.com",
"http://www.exampl\u0435.com",
"http://exampl\u0435.com",
"http://example.com\u0435",
"http://exampl.com.\u0435stuff"
].each do |url|
assert_equal url, Rinku.auto_link(url)
assert_equal url, Rinku.auto_link(url, nil, nil, nil, Rinku::AUTOLINK_SHORT_DOMAINS)
end

["http://example.com?stuff", "http://exampl.com/\u0435stuff"].each do |url|
assert_equal "<a href=\"#{url}\">#{url}</a>", Rinku.auto_link(url)
end

assert_equal "<a href=\"http://example.com\">http://example.com</a> stuff", Rinku.auto_link("http://example.com stuff")
end
end