Skip to content

Commit b14f89d

Browse files
authored
Merge pull request #163 from rails/flavorjones-ensure-utf8-encoding-from-all-sanitizers
fix: ensure LinkSanitizer returns utf-8 encoded strings
2 parents 2b0dcb5 + 47f6255 commit b14f89d

File tree

3 files changed

+25
-12
lines changed

3 files changed

+25
-12
lines changed

CHANGELOG.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,17 @@
3232

3333
*Mike Dalessio*
3434

35+
* `LinkSanitizer` always returns UTF-8 encoded strings. `SafeListSanitizer` and `FullSanitizer`
36+
already ensured this encoding.
37+
38+
*Mike Dalessio*
39+
3540
* `SafeListSanitizer` allows `time` tag and `lang` attribute by default.
3641

3742
*Mike Dalessio*
3843

39-
* `Rails::Html::XPATHS_TO_REMOVE` has been removed. It's not necessary with the existing sanitizers,
40-
and should have been a private constant all along anyway.
44+
* The constant `Rails::Html::XPATHS_TO_REMOVE` has been removed. It's not necessary with the
45+
existing sanitizers, and should have been a private constant all along anyway.
4146

4247
*Mike Dalessio*
4348

lib/rails/html/sanitizer.rb

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,6 @@ def serialize(fragment)
182182
properly_encode(fragment, encoding: "UTF-8")
183183
end
184184
end
185-
186-
module SimpleString
187-
def serialize(fragment)
188-
fragment.to_s
189-
end
190-
end
191185
end
192186
end
193187
end
@@ -242,7 +236,7 @@ class LinkSanitizer < Rails::HTML::Sanitizer
242236
include HTML::Concern::ComposedSanitize
243237
include HTML::Concern::Parser::HTML4
244238
include HTML::Concern::Scrubber::Link
245-
include HTML::Concern::Serializer::SimpleString
239+
include HTML::Concern::Serializer::UTF8Encode
246240
end
247241

248242
# == Rails::HTML4::SafeListSanitizer
@@ -352,7 +346,7 @@ class LinkSanitizer < Rails::HTML::Sanitizer
352346
include HTML::Concern::ComposedSanitize
353347
include HTML::Concern::Parser::HTML5
354348
include HTML::Concern::Scrubber::Link
355-
include HTML::Concern::Serializer::SimpleString
349+
include HTML::Concern::Serializer::UTF8Encode
356350
end
357351

358352
# == Rails::HTML5::SafeListSanitizer

test/sanitizer_test.rb

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,13 @@ def test_full_sanitize_respect_html_escaping_of_the_given_string
174174
assert_equal "omg &lt;script&gt;BOM&lt;/script&gt;", full_sanitize("omg &lt;script&gt;BOM&lt;/script&gt;")
175175
end
176176

177+
def test_sanitize_ascii_8bit_string
178+
full_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
179+
assert_equal "hello", sanitized
180+
assert_equal Encoding::UTF_8, sanitized.encoding
181+
end
182+
end
183+
177184
protected
178185
def full_sanitize(input, options = {})
179186
module_under_test::FullSanitizer.new.sanitize(input, options)
@@ -223,6 +230,13 @@ def test_strip_links_with_linkception
223230
assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
224231
end
225232

233+
def test_sanitize_ascii_8bit_string
234+
link_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
235+
assert_equal "<div>hello</div>", sanitized
236+
assert_equal Encoding::UTF_8, sanitized.encoding
237+
end
238+
end
239+
226240
protected
227241
def link_sanitize(input, options = {})
228242
module_under_test::LinkSanitizer.new.sanitize(input, options)
@@ -671,8 +685,8 @@ def test_x03a_legitimate
671685
end
672686

673687
def test_sanitize_ascii_8bit_string
674-
safe_list_sanitize("<a>hello</a>".encode("ASCII-8BIT")).tap do |sanitized|
675-
assert_equal "<a>hello</a>", sanitized
688+
safe_list_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
689+
assert_equal "<div><a>hello</a></div>", sanitized
676690
assert_equal Encoding::UTF_8, sanitized.encoding
677691
end
678692
end

0 commit comments

Comments
 (0)