@@ -323,6 +323,49 @@ def test_element_node_add_child_as_dict():
323
323
assert new_child .nodeValue == "Hello"
324
324
325
325
326
+ def test_element_node_get_outer_html ():
327
+ """
328
+ Get a string representation of the node's complete structure.
329
+ """
330
+ n = polyplug .ElementNode (tagName = "div" , attributes = {"foo" : "bar" })
331
+ n .add_child (polyplug .TextNode (nodeValue = "Hello" ))
332
+ assert n .outerHTML == "<div foo=\" bar\" >Hello</div>"
333
+
334
+
335
+ def test_element_node_get_inner_html_empty ():
336
+ """
337
+ Get a string representation of the node's inner structure. Empty.
338
+ """
339
+ n = polyplug .ElementNode (tagName = "div" , attributes = {"foo" : "bar" })
340
+ assert n .innerHTML == ""
341
+
342
+
343
+ def test_element_node_get_set_inner_html_complex ():
344
+ """
345
+ Get a string representation of the node's inner structure.
346
+ """
347
+ n = polyplug .ElementNode (tagName = "div" , attributes = {"foo" : "bar" })
348
+ n .innerHTML = "<!-- comment --><p>Hello</p>"
349
+ assert n .innerHTML == "<!-- comment --><p>Hello</p>"
350
+
351
+ def test_element_node_set_inner_html_empty ():
352
+ """
353
+ Set the innerHTML of the node as empty.
354
+ """
355
+ n = polyplug .ElementNode (tagName = "div" , attributes = {"foo" : "bar" })
356
+ n .innerHTML = "<!-- comment --><p>Hello</p>"
357
+ n .innerHTML = ""
358
+ assert n .innerHTML == ""
359
+
360
+
361
+ def test_element_node_set_inner_html_textarea ():
362
+ """
363
+ Set the innerHTML of the node as a textarea.
364
+ """
365
+ n = polyplug .ElementNode (tagName = "div" , attributes = {"foo" : "bar" })
366
+ n .innerHTML = "<textarea>Test <fake html></textarea>"
367
+ assert n .innerHTML == "<textarea>Test <fake html></textarea>"
368
+
326
369
def test_text_node ():
327
370
"""
328
371
The TextNode instantiates as expected.
@@ -511,6 +554,17 @@ def test_htmltokenizer_get_attrs_no_attributes():
511
554
512
555
513
556
def test_htmltokenizer_get_text ():
557
+ """
558
+ Get the textual content of a TextNode (i.e. everything until encountering
559
+ "<").
560
+ """
561
+ raw = "Hello, world!<div>"
562
+ tok = polyplug .HTMLTokenizer (raw )
563
+ assert tok .get_text () == "Hello, world!"
564
+ assert tok .char == "<"
565
+
566
+
567
+ def test_htmltokenizer_get_text_bound_check ():
514
568
"""
515
569
Get the textual content of a TextNode (i.e. everything until encountering
516
570
"<").
@@ -521,6 +575,17 @@ def test_htmltokenizer_get_text():
521
575
assert tok .char == "<"
522
576
523
577
578
+ def test_htmltokenizer_get_text_eof ():
579
+ """
580
+ Get the textual content of a TextNode (i.e. everything until encountering
581
+ "<").
582
+ """
583
+ raw = "Hello, world!"
584
+ tok = polyplug .HTMLTokenizer (raw )
585
+ assert tok .get_text () == "Hello, world!"
586
+ assert tok .char == ""
587
+
588
+
524
589
def test_htmltokenizer_get_text_until ():
525
590
"""
526
591
Get the textual content of a TextNode (i.e. everything until encountering
@@ -571,3 +636,126 @@ def test_htmltokenizer_tokenize_comment():
571
636
tok = polyplug .HTMLTokenizer (raw )
572
637
tok .tokenize (parent )
573
638
assert len (parent .childNodes ) == 1
639
+ assert isinstance (parent .childNodes [0 ], polyplug .CommentNode )
640
+ assert parent .childNodes [0 ].nodeValue == " Test comment. "
641
+
642
+
643
+ def test_htmltokenizer_tokenize_prolog ():
644
+ """
645
+ Given a valid parent, an XML prolog node is ignored. <? foo ?>
646
+ """
647
+ parent = polyplug .ElementNode (tagName = "div" )
648
+ raw = "<? xml ?>"
649
+ tok = polyplug .HTMLTokenizer (raw )
650
+ tok .tokenize (parent )
651
+ assert len (parent .childNodes ) == 0
652
+
653
+
654
+ def test_htmltokenizer_tokenize_text ():
655
+ """
656
+ Textual content becomes a child TextNode.
657
+ """
658
+ parent = polyplug .ElementNode (tagName = "div" )
659
+ raw = "Test text."
660
+ tok = polyplug .HTMLTokenizer (raw )
661
+ tok .tokenize (parent )
662
+ assert len (parent .childNodes ) == 1
663
+ assert isinstance (parent .childNodes [0 ], polyplug .TextNode )
664
+ assert parent .childNodes [0 ].nodeValue == "Test text."
665
+
666
+
667
+ def test_htmltokenizer_tokenize_element ():
668
+ """
669
+ An HTML element becomes a child ElementNode.
670
+ """
671
+ parent = polyplug .ElementNode (tagName = "div" )
672
+ raw = "<p>Hello</p>"
673
+ tok = polyplug .HTMLTokenizer (raw )
674
+ tok .tokenize (parent )
675
+ assert len (parent .childNodes ) == 1
676
+ p_node = parent .childNodes [0 ]
677
+ assert isinstance (p_node , polyplug .ElementNode )
678
+ assert len (p_node .childNodes ) == 1
679
+ assert isinstance (p_node .childNodes [0 ], polyplug .TextNode )
680
+ assert p_node .childNodes [0 ].nodeValue == "Hello"
681
+
682
+
683
+ def test_htmltokenizer_tokenize_element_textarea ():
684
+ """
685
+ An HTML element becomes a child ElementNode. But textarea nodes will only
686
+ have a value (no children) containing the text within the tags.
687
+ """
688
+ parent = polyplug .ElementNode (tagName = "div" )
689
+ raw = "<textarea>Hello <fake-html></textarea>"
690
+ tok = polyplug .HTMLTokenizer (raw )
691
+ tok .tokenize (parent )
692
+ assert len (parent .childNodes ) == 1
693
+ textarea = parent .childNodes [0 ]
694
+ assert isinstance (textarea , polyplug .ElementNode )
695
+ assert len (textarea .childNodes ) == 0
696
+ assert textarea .value == "Hello <fake-html>"
697
+
698
+
699
+ def test_htmltokenizer_tokenize_element_unexpected ():
700
+ """
701
+ An unexpected close tag results in a ValueError.
702
+ """
703
+ parent = polyplug .ElementNode (tagName = "div" )
704
+ raw = "</textarea>"
705
+ tok = polyplug .HTMLTokenizer (raw )
706
+ with pytest .raises (ValueError ):
707
+ tok .tokenize (parent )
708
+
709
+
710
+ def test_htmltokenizer_tokenize_complex_tree ():
711
+ """
712
+ A more complex tree with several branches and node types.
713
+ """
714
+ parent = polyplug .ElementNode (tagName = "div" )
715
+ raw = "<!-- comment --><div id='myId'><p>Hello</p><p>world</p></div>"
716
+ expected = {
717
+ "childNodes" : [
718
+ {
719
+ "childNodes" : [],
720
+ "nodeName" : "#comment" ,
721
+ "nodeType" : 8 ,
722
+ "nodeValue" : " comment " ,
723
+ },
724
+ {
725
+ "attributes" : {"id" : "myId" },
726
+ "childNodes" : [
727
+ {
728
+ "childNodes" : [
729
+ {
730
+ "childNodes" : [],
731
+ "nodeName" : "#text" ,
732
+ "nodeType" : 3 ,
733
+ "nodeValue" : "Hello" ,
734
+ }
735
+ ],
736
+ "nodeType" : 1 ,
737
+ "tagName" : "p" ,
738
+ },
739
+ {
740
+ "childNodes" : [
741
+ {
742
+ "childNodes" : [],
743
+ "nodeName" : "#text" ,
744
+ "nodeType" : 3 ,
745
+ "nodeValue" : "world" ,
746
+ }
747
+ ],
748
+ "nodeType" : 1 ,
749
+ "tagName" : "p" ,
750
+ },
751
+ ],
752
+ "nodeType" : 1 ,
753
+ "tagName" : "div" ,
754
+ },
755
+ ],
756
+ "nodeType" : 1 ,
757
+ "tagName" : "div" ,
758
+ }
759
+ tok = polyplug .HTMLTokenizer (raw )
760
+ tok .tokenize (parent )
761
+ assert parent .as_dict == expected
0 commit comments