@@ -65,16 +65,22 @@ def process_solidus_in_tag
65
65
66
66
# We need to consume another character to make sure it's a ">"
67
67
data = @stream . char
68
-
68
+ rv = false
69
69
if @current_token [ :type ] == :StartTag and data == ">"
70
70
@current_token [ :type ] = :EmptyTag
71
+ elsif data == :EOF
72
+ @token_queue << ( { :type => :ParseError , :data => "eof-following-solidus" } )
73
+ @state = :data_state
74
+ emit_current_token
75
+ rv = true
71
76
else
72
77
@token_queue << { :type => :ParseError , :data => "incorrectly-placed-solidus" }
73
78
end
74
79
75
80
# The character we just consumed need to be put back on the stack so it
76
81
# doesn't get lost...
77
82
@stream . unget ( data )
83
+ rv
78
84
end
79
85
80
86
# This function returns either U+FFFD or the character based on the
@@ -117,7 +123,8 @@ def consume_number_entity(isHex)
117
123
charAsInt = ENTITIES_WINDOWS1252 [ charAsInt - 128 ]
118
124
end
119
125
120
- if 0 < charAsInt and charAsInt <= 1114111 and not ( 55296 <= charAsInt and charAsInt <= 57343 )
126
+ if 0 < charAsInt && charAsInt <= 1114111 && !( 55296 <= charAsInt && charAsInt <= 57343 ) &&
127
+ ![ 0x10FFFF ] . include? ( charAsInt ) # TODO add more entity replacements here
121
128
if String . method_defined? :force_encoding
122
129
char = charAsInt . chr ( 'utf-8' )
123
130
else
@@ -475,8 +482,9 @@ def attribute_name_state
475
482
elsif SPACE_CHARACTERS . include? data
476
483
@state = :after_attribute_name_state
477
484
elsif data == "/"
478
- process_solidus_in_tag
479
- @state = :before_attribute_name_state
485
+ if !process_solidus_in_tag
486
+ @state = :before_attribute_name_state
487
+ end
480
488
elsif data == "'" or data == '"' :
481
489
@token_queue . push ( { :type => :ParseError , :data => "invalid-character-in-attribute-name" } )
482
490
@current_token [ :data ] [ -1 ] [ 0 ] += data
@@ -520,8 +528,9 @@ def after_attribute_name_state
520
528
@current_token [ :data ] . push ( [ data , "" ] )
521
529
@state = :attribute_name_state
522
530
elsif data == "/"
523
- process_solidus_in_tag
524
- @state = :before_attribute_name_state
531
+ if !process_solidus_in_tag
532
+ @state = :before_attribute_name_state
533
+ end
525
534
else
526
535
@current_token [ :data ] . push ( [ data , "" ] )
527
536
@state = :attribute_name_state
@@ -592,7 +601,7 @@ def attribute_value_unquoted_state
592
601
if SPACE_CHARACTERS . include? data
593
602
@state = :before_attribute_name_state
594
603
elsif data == "&"
595
- process_entity_in_attribute
604
+ process_entity_in_attribute ''
596
605
elsif data == ">"
597
606
emit_current_token
598
607
elsif data == '"' || data == "'" || data == "=" :
@@ -615,12 +624,13 @@ def after_attribute_value_state
615
624
emit_current_token
616
625
@state = :data_state
617
626
elsif data == "/"
618
- process_solidus_in_tag
619
- @state = :before_attribute_name_state
627
+ if !process_solidus_in_tag
628
+ @state = :before_attribute_name_state
629
+ end
620
630
else
621
- @tokenQueue . push ( { :type => :ParseError , :data => "unexpected-character-after-attribute-value" } )
631
+ @token_queue . push ( { :type => :ParseError , :data => "unexpected-character-after-attribute-value" } )
622
632
@stream . unget ( data )
623
- @state = :before_attribute_name
633
+ @state = :before_attribute_name_state
624
634
end
625
635
true
626
636
end
@@ -629,7 +639,7 @@ def bogus_comment_state
629
639
# Make a new comment token and give it as value all the characters
630
640
# until the first > or :EOF (chars_until checks for :EOF automatically)
631
641
# and emit it.
632
- @token_queue << { :type => :Comment , :data => @stream . chars_until ( ( ">" ) ) }
642
+ @token_queue << { :type => :Comment , :data => @stream . chars_until ( [ ">" ] ) }
633
643
634
644
# Eat the character directly after the bogus comment which is either a
635
645
# ">" or an :EOF.
@@ -824,6 +834,7 @@ def after_doctype_name_state
824
834
else
825
835
@stream . unget ( char_stack )
826
836
@token_queue << { :type => :ParseError , :data => "expected-space-or-right-bracket-in-doctype" , "datavars" => { "data" => data } }
837
+ @current_token [ :correct ] = false
827
838
@state = :bogus_doctype_state
828
839
end
829
840
end
@@ -852,6 +863,7 @@ def before_doctype_public_identifier_state
852
863
@state = :data_state
853
864
else
854
865
@token_queue << { :type => :ParseError , :data => "unexpected-char-in-doctype" }
866
+ @current_token [ :correct ] = false
855
867
@state = :bogus_doctype_state
856
868
end
857
869
@@ -917,6 +929,7 @@ def after_doctype_public_identifier_state
917
929
@state = :data_state
918
930
else
919
931
@token_queue << { :type => :ParseError , :data => "eof-in-doctype" }
932
+ @current_token [ :correct ] = false
920
933
@state = :bogus_doctype_state
921
934
end
922
935
return true
@@ -943,6 +956,7 @@ def before_doctype_system_identifier_state
943
956
@state = :data_state
944
957
else
945
958
@token_queue << { :type => :ParseError , :data => "unexpected-char-in-doctype" }
959
+ @current_token [ :correct ] = false
946
960
@state = :bogus_doctype_state
947
961
end
948
962
return true
@@ -1008,15 +1022,11 @@ def after_doctype_system_identifier_state
1008
1022
1009
1023
def bogus_doctype_state
1010
1024
data = @stream . char
1011
- @current_token [ :correct ] = false
1012
1025
if data == ">"
1013
1026
@token_queue << @current_token
1014
1027
@state = :data_state
1015
1028
elsif data == :EOF
1016
- # XXX EMIT
1017
1029
@stream . unget ( data )
1018
- @token_queue << { :type => :ParseError , :data => "eof-in-doctype" }
1019
- @current_token [ :correct ] = false
1020
1030
@token_queue << @current_token
1021
1031
@state = :data_state
1022
1032
end
0 commit comments