Skip to content

Commit

Permalink
Introduce an acts_as_loofah method
Browse files Browse the repository at this point in the history
to make Nokogiri instances act like Loofah instances.

These classes have a new instance method, `#acts_as_loofah`:

- Nokogiri::XML::Document
- Nokogiri::XML::DocumentFragment
- Nokogiri::HTML4::Document
- Nokogiri::HTML4::DocumentFragment
- Nokogiri::HTML5::Document
- Nokogiri::HTML5::DocumentFragment

This method extends the Nokogiri object so that it quacks like the
corresponding Loofah object.

Note that this method will also extend any existing child element
objects, just as if those objects were created as children of a Loofah
document.
  • Loading branch information
flavorjones committed Jan 22, 2025
1 parent 8599060 commit a397eda
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 3 deletions.
10 changes: 8 additions & 2 deletions lib/loofah/concerns.rb
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,16 @@ def to_text(options = {})
end

module DocumentDecorator # :nodoc:
class << self
def decorate(object)
object.decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
object.decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
end
end

def initialize(*args, &block)
super
decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
DocumentDecorator.decorate(self)
end
end

Expand Down
10 changes: 10 additions & 0 deletions lib/loofah/html4/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ class Document < Nokogiri::HTML4::Document
include Loofah::DocumentDecorator
include Loofah::TextBehavior
include Loofah::HtmlDocumentBehavior

module NokogiriExtender
def acts_as_loofah
super
singleton_class.include(Loofah::TextBehavior)
singleton_class.include(Loofah::HtmlDocumentBehavior)
end
end
end
end
end

Nokogiri::HTML4::Document.include(Loofah::HTML4::Document::NokogiriExtender)
10 changes: 10 additions & 0 deletions lib/loofah/html4/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@ module HTML4 # :nodoc:
class DocumentFragment < Nokogiri::HTML4::DocumentFragment
include Loofah::TextBehavior
include Loofah::HtmlFragmentBehavior

module NokogiriExtender
def acts_as_loofah
super
singleton_class.include(Loofah::TextBehavior)
singleton_class.include(Loofah::HtmlFragmentBehavior)
end
end
end
end
end

Nokogiri::HTML4::DocumentFragment.include(Loofah::HTML4::DocumentFragment::NokogiriExtender)
23 changes: 22 additions & 1 deletion lib/loofah/xml/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,32 @@ module XML # :nodoc:
#
# Subclass of Nokogiri::XML::Document.
#
# See Loofah::ScrubBehavior and Loofah::DocumentDecorator for additional methods.
# See Loofah::ScrubBehavior for additional methods.
#
class Document < Nokogiri::XML::Document
include Loofah::ScrubBehavior::Node
include Loofah::DocumentDecorator

module NokogiriExtender
def acts_as_loofah
singleton_class.include(Loofah::ScrubBehavior::Node)
Loofah::DocumentDecorator.decorate(self)
decorate_existing
end

# TODO: this should to be upstreamed into Nokogiri
def decorate_existing # :nodoc:
return unless @decorators

if Nokogiri.jruby?
traverse { |node| decorate(node) }
else
@node_cache.each { |node| decorate(node) }
end
end
end
end
end
end

Nokogiri::XML::Document.include(Loofah::XML::Document::NokogiriExtender)
17 changes: 17 additions & 0 deletions lib/loofah/xml/document_fragment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,23 @@ def parse(tags)
new(doc, tags)
end
end

module NokogiriExtender
def acts_as_loofah
document.acts_as_loofah
decorate_existing
end

# TODO: this should to be upstreamed into Nokogiri
def decorate_existing # :nodoc:
return unless Nokogiri.jruby?
return unless document.instance_variable_get(:@decorators)

traverse { |node| document.decorate(node) }
end
end
end
end
end

Nokogiri::XML::DocumentFragment.include(Loofah::XML::DocumentFragment::NokogiriExtender)
67 changes: 67 additions & 0 deletions test/unit/test_acts_as_loofah.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# frozen_string_literal: true

require "helper"

class UnitTestActsAsLoofah < Loofah::TestCase
SUBJECTS = [Nokogiri::XML, Nokogiri::HTML4, defined?(Nokogiri::HTML5) && Nokogiri::HTML5].compact

SUBJECTS.each do |subject|
describe subject do
it "Document act like Loofah" do
ndoc = subject::Document.parse("<html><body><div>hello</div><span>hello</span><script>alert(1)</script></body></html>")
node = ndoc.at_css("div")

# method presence
refute_respond_to(ndoc, :scrub!)
refute_respond_to(node, :scrub!)

ndoc.acts_as_loofah

assert_respond_to(ndoc, :scrub!, "#{subject}::Document should be extended")
assert_respond_to(ndoc.at_css("span"), :scrub!, "New child elements should be extended")
assert_respond_to(node, :scrub!, "Existing child elements should be extended")

refute_respond_to(subject::Document.parse("<div>"), :scrub!, "Other instances should not be extended")

# scrub behavior
ndoc.scrub!(:prune)

refute_includes(ndoc.to_html, "script")

# other concerns
if subject.name.include?("HTML")
assert_includes(ndoc.singleton_class.ancestors, Loofah::TextBehavior)
assert_includes(ndoc.singleton_class.ancestors, Loofah::HtmlDocumentBehavior)
end
end

it "DocumentFragment act like Loofah" do
nfrag = subject::DocumentFragment.parse("<div>hello</div><span>hello</span><script>alert(1)</script>")
node = nfrag.at_css("div")

# method presence
refute_respond_to(nfrag, :scrub!)
refute_respond_to(node, :scrub!)

nfrag.acts_as_loofah

assert_respond_to(nfrag, :scrub!, "#{subject}::DocumentFragment should be extended")
assert_respond_to(nfrag.at_css("span"), :scrub!, "New child elements should be extended")
assert_respond_to(node, :scrub!, "Existing child elements should be extended")

refute_respond_to(subject::DocumentFragment.parse("<div>"), :scrub!, "Other instances should not be extended")

# scrub behavior
nfrag.scrub!(:prune)

refute_includes(nfrag.to_html, "script")

# other concerns
if subject.name.include?("HTML")
assert_includes(nfrag.singleton_class.ancestors, Loofah::TextBehavior)
assert_includes(nfrag.singleton_class.ancestors, Loofah::HtmlFragmentBehavior)
end
end
end
end
end

0 comments on commit a397eda

Please sign in to comment.