Skip to content

Commit af475f7

Browse files
committed
more tokenizer updates: self closing tag flag
--HG-- extra : convert_revision : svn%3Aacbfec75-9323-0410-a652-858a13e371e0/trunk%401209
1 parent 4cacfde commit af475f7

File tree

3 files changed

+28
-4
lines changed

3 files changed

+28
-4
lines changed

lib/html5/constants.rb

+2-1
Original file line numberDiff line numberDiff line change
@@ -1041,7 +1041,8 @@ def self._(str); str end
10411041
". Expected end of file."),
10421042
"unexpected-end-table-in-caption" =>
10431043
_("Unexpected end table tag in caption. Generates implied end caption."),
1044-
"end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode.")
1044+
"end-html-in-innerhtml" => _("Unexpected html end tag in inner html mode."),
1045+
"expected-self-closing-tag" => _("Expected a > after the /.")
10451046
}
10461047

10471048
end

lib/html5/tokenizer.rb

+25-2
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,7 @@ def tag_name_state
428428
elsif data == ">"
429429
emit_current_token
430430
elsif data == "/"
431-
process_solidus_in_tag
432-
@state = :before_attribute_name_state
431+
@state = :self_closing_tag_state
433432
else
434433
@current_token[:name] += data
435434
end
@@ -627,6 +626,11 @@ def after_attribute_value_state
627626
if !process_solidus_in_tag
628627
@state = :before_attribute_name_state
629628
end
629+
elsif data == :EOF
630+
@token_queue << {:type => :ParseError, :data => "unexpected-EOF-after-attribute-value"}
631+
emit_current_token
632+
@stream.unget(data)
633+
@state = :data_state
630634
else
631635
@token_queue.push({:type => :ParseError, :data => "unexpected-character-after-attribute-value"})
632636
@stream.unget(data)
@@ -635,6 +639,25 @@ def after_attribute_value_state
635639
true
636640
end
637641

642+
def self_closing_tag_state
643+
c = @stream.char
644+
case c
645+
when ">"
646+
emit_current_token
647+
@current_token[:self_closing] = true
648+
@state = :data_state
649+
when :EOF
650+
@token_queue << {:type => :ParseError, :data => "eof-in-tag-name"}
651+
@stream.unget(c)
652+
@state = :data_state
653+
else
654+
@token_queue << {:type => :ParseError, :data => "expected-self-closing-tag"}
655+
@stream.unget(c)
656+
@state = :before_attribute_name_state
657+
end
658+
true
659+
end
660+
638661
def bogus_comment_state
639662
# Make a new comment token and give it as value all the characters
640663
# until the first > or :EOF (chars_until checks for :EOF automatically)

test/test_tokenizer.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def tokenizer_test(data)
6363
'' ] * "\n"
6464

6565
assert_nothing_raised message do
66-
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
66+
tokenizer = HTML5::HTMLTokenizer.new(data['input'], :encoding => 'utf-8')
6767

6868
tokenizer.content_model_flag = content_model_flag.to_sym
6969

0 commit comments

Comments
 (0)