|
| 1 | +context("large-test") |
| 2 | + |
| 3 | +test_that("selection works correctly on a large barrage of tests", { |
| 4 | + HTML_IDS <- paste0( |
| 5 | + c("<html id=\"html\"><head>", " <link id=\"link-href\" href=\"foo\" />", |
| 6 | + " <link id=\"link-nohref\" />", "</head><body>", "<div id=\"outer-div\">", |
| 7 | + " <a id=\"name-anchor\" name=\"foo\"></a>", " <a id=\"tag-anchor\" rel=\"tag\" href=\"http://localhost/foo\">link</a>", |
| 8 | + " <a id=\"nofollow-anchor\" rel=\"nofollow\" href=\"https://example.org\">", |
| 9 | + " link</a>", " <ol id=\"first-ol\" class=\"a b c\">", " <li id=\"first-li\">content</li>", |
| 10 | + " <li id=\"second-li\" lang=\"En-us\">", " <div id=\"li-div\">", |
| 11 | + " </div>", " </li>", " <li id=\"third-li\" class=\"ab c\"></li>", |
| 12 | + " <li id=\"fourth-li\" class=\"ab", "c\"></li>", " <li id=\"fifth-li\"></li>", |
| 13 | + " <li id=\"sixth-li\"></li>", " <li id=\"seventh-li\"> </li>", |
| 14 | + " </ol>", " <p id=\"paragraph\">", " <b id=\"p-b\">hi</b> <em id=\"p-em\">there</em>", |
| 15 | + " <b id=\"p-b2\">guy</b>", " <input type=\"checkbox\" id=\"checkbox-unchecked\" />", |
| 16 | + " <input type=\"checkbox\" id=\"checkbox-disabled\" disabled=\"\" />", |
| 17 | + " <input type=\"text\" id=\"text-checked\" checked=\"checked\" />", |
| 18 | + " <input type=\"hidden\" />", " <input type=\"hidden\" disabled=\"disabled\" />", |
| 19 | + " <input type=\"checkbox\" id=\"checkbox-checked\" checked=\"checked\" />", |
| 20 | + " <input type=\"checkbox\" id=\"checkbox-disabled-checked\"", |
| 21 | + " disabled=\"disabled\" checked=\"checked\" />", " <fieldset id=\"fieldset\" disabled=\"disabled\">", |
| 22 | + " <input type=\"checkbox\" id=\"checkbox-fieldset-disabled\" />", |
| 23 | + " <input type=\"hidden\" />", " </fieldset>", " </p>", |
| 24 | + " <ol id=\"second-ol\">", " </ol>", " <map name=\"dummymap\">", |
| 25 | + " <area shape=\"circle\" coords=\"200,250,25\" href=\"foo.html\" id=\"area-href\" />", |
| 26 | + " <area shape=\"default\" id=\"area-nohref\" />", " </map>", |
| 27 | + "</div>", "<div id=\"foobar-div\" foobar=\"ab bc", "cde\"><span id=\"foobar-span\"></span></div>", |
| 28 | + "</body></html>"), collapse = "\n") |
| 29 | + |
| 30 | + library(XML) |
| 31 | + document <- xmlRoot(xmlParse(HTML_IDS)) |
| 32 | + gt <- GenericTranslator$new() |
| 33 | + ht <- HTMLTranslator$new() |
| 34 | + |
| 35 | + select_ids <- function(selector, html_only) { |
| 36 | + if (html_only) { |
| 37 | + xpath <- ht$css_to_xpath(selector) |
| 38 | + items <- getNodeSet(document, xpath) |
| 39 | + } else { |
| 40 | + xpath <- gt$css_to_xpath(selector) |
| 41 | + items <- getNodeSet(document, xpath) |
| 42 | + } |
| 43 | + n <- length(items) |
| 44 | + if (! n) |
| 45 | + return(NULL) |
| 46 | + result <- character(n) |
| 47 | + for (i in seq_len(n)) { |
| 48 | + element <- items[[i]] |
| 49 | + tmp <- xmlAttrs(element)["id"] |
| 50 | + if (is.null(tmp)) |
| 51 | + tmp <- "nil" |
| 52 | + result[i] <- tmp |
| 53 | + } |
| 54 | + result |
| 55 | + } |
| 56 | + |
| 57 | + pcss <- function(main, selectors = NULL, html_only = FALSE) { |
| 58 | + result <- select_ids(main, html_only) |
| 59 | + if (! is.null(selectors) && length(selectors)) { |
| 60 | + n <- length(selectors) |
| 61 | + for (i in seq_len(n)) { |
| 62 | + tmp_res <- select_ids(selectors[i], html_only = html_only) |
| 63 | + if (! is.null(result) && ! is.null(tmp_res) && |
| 64 | + tmp_res != result) |
| 65 | + stop("Difference between results of selectors") |
| 66 | + } |
| 67 | + } |
| 68 | + result |
| 69 | + } |
| 70 | + |
| 71 | + all_ids <- pcss('*') |
| 72 | + expect_that(all_ids[1:6], equals(c('html', 'nil', 'link-href', 'link-nohref', 'nil', 'outer-div'))) |
| 73 | + expect_that(tail(all_ids, 1), equals('foobar-span')) |
| 74 | + expect_that(pcss('div'), equals(c('outer-div', 'li-div', 'foobar-div'))) |
| 75 | + expect_that(pcss('DIV', html_only=TRUE), equals(c('outer-div', 'li-div', 'foobar-div'))) # case-insensitive in HTML |
| 76 | + expect_that(pcss('div div'), equals('li-div')) |
| 77 | + expect_that(pcss('div, div div'), equals(c('outer-div', 'li-div', 'foobar-div'))) |
| 78 | + expect_that(pcss('a[name]'), equals('name-anchor')) |
| 79 | + expect_that(pcss('a[NAme]', html_only=TRUE), equals('name-anchor')) # case-insensitive in HTML: |
| 80 | + expect_that(pcss('a[rel]'), equals(c('tag-anchor', 'nofollow-anchor'))) |
| 81 | + expect_that(pcss('a[rel="tag"]'), equals('tag-anchor')) |
| 82 | + expect_that(pcss('a[href*="localhost"]'), equals('tag-anchor')) |
| 83 | + expect_that(pcss('a[href*=""]'), equals(NULL)) |
| 84 | + expect_that(pcss('a[href^="http"]'), equals(c('tag-anchor', 'nofollow-anchor'))) |
| 85 | + expect_that(pcss('a[href^="http:"]'), equals('tag-anchor')) |
| 86 | + expect_that(pcss('a[href^=""]'), equals(NULL)) |
| 87 | + expect_that(pcss('a[href$="org"]'), equals('nofollow-anchor')) |
| 88 | + expect_that(pcss('a[href$=""]'), equals(NULL)) |
| 89 | + expect_that(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]'), equals('foobar-div')) |
| 90 | + expect_that(pcss('[foobar~="ab bc"]', c('[foobar~=""]', '[foobar~=" \t"]')), equals(NULL)) |
| 91 | + expect_that(pcss('div[foobar~="cd"]'), equals(NULL)) |
| 92 | + expect_that(pcss('*[lang|="En"]', '[lang|="En-us"]'), equals('second-li')) |
| 93 | + # Attribute values are case sensitive |
| 94 | + expect_that(pcss('*[lang|="en"]', '[lang|="en-US"]'), equals(NULL)) |
| 95 | + expect_that(pcss('*[lang|="e"]'), equals(NULL)) |
| 96 | + # ... :lang() is not. |
| 97 | + expect_that(pcss(':lang("EN")', '*:lang(en-US)', html_only=TRUE), equals(c('second-li', 'li-div'))) |
| 98 | + expect_that(pcss(':lang("e")', html_only=TRUE), equals(NULL)) |
| 99 | + expect_that(pcss('li:nth-child(-n)'), equals(NULL)) |
| 100 | + expect_that(pcss('li:nth-child(n)'), equals(c('first-li', 'second-li', 'third-li', 'fourth-li', 'fifth-li', 'sixth-li', 'seventh-li'))) |
| 101 | + expect_that(pcss('li:nth-child(3)'), equals('third-li')) |
| 102 | + expect_that(pcss('li:nth-child(10)'), equals(NULL)) |
| 103 | + expect_that(pcss('li:nth-child(2n)', c('li:nth-child(even)', 'li:nth-child(2n+0)')), equals(c('second-li', 'fourth-li', 'sixth-li'))) |
| 104 | + expect_that(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), equals(c('first-li', 'third-li', 'fifth-li', 'seventh-li'))) |
| 105 | + expect_that(pcss('li:nth-child(2n+4)'), equals(c('fourth-li', 'sixth-li'))) |
| 106 | + expect_that(pcss('li:nth-child(3n+1)'), equals(c('first-li', 'fourth-li', 'seventh-li'))) |
| 107 | + expect_that(pcss('li:nth-child(-n+3)'), equals(c('first-li', 'second-li', 'third-li'))) |
| 108 | + expect_that(pcss('li:nth-child(-2n+4)'), equals(c('second-li', 'fourth-li'))) |
| 109 | + expect_that(pcss('li:nth-last-child(0)'), equals(NULL)) |
| 110 | + expect_that(pcss('li:nth-last-child(1)'), equals('seventh-li')) |
| 111 | + expect_that(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), equals(c('second-li', 'fourth-li', 'sixth-li'))) |
| 112 | + expect_that(pcss('li:nth-last-child(2n+2)'), equals(c('second-li', 'fourth-li', 'sixth-li'))) |
| 113 | + expect_that(pcss('ol:first-of-type'), equals('first-ol')) |
| 114 | + expect_that(pcss('ol:nth-child(1)'), equals(NULL)) |
| 115 | + expect_that(pcss('ol:nth-of-type(2)'), equals('second-ol')) |
| 116 | + expect_that(pcss('ol:nth-last-of-type(1)'), equals('second-ol')) |
| 117 | + expect_that(pcss('span:only-child'), equals('foobar-span')) |
| 118 | + expect_that(pcss('li div:only-child'), equals('li-div')) |
| 119 | + expect_that(pcss('div *:only-child'), equals(c('li-div', 'foobar-span'))) |
| 120 | + #self.assertRaises(ExpressionError, pcss, 'p *:only-of-type') |
| 121 | + expect_that(pcss('p:only-of-type'), equals('paragraph')) |
| 122 | + expect_that(pcss('a:empty', 'a:EMpty'), equals('name-anchor')) |
| 123 | + expect_that(pcss('li:empty'), equals(c('third-li', 'fourth-li', 'fifth-li', 'sixth-li'))) |
| 124 | + expect_that(pcss(':root', 'html:root'), equals('html')) |
| 125 | + expect_that(pcss('li:root', '* :root'), equals(NULL)) |
| 126 | + expect_that(pcss('*:contains("link")', ':CONtains("link")'), equals(c('html', 'nil', 'outer-div', 'tag-anchor', 'nofollow-anchor'))) |
| 127 | + expect_that(pcss('*:contains("LInk")'), equals(NULL)) # case sensitive |
| 128 | + expect_that(pcss('*:contains("e")'), equals(c('html', 'nil', 'outer-div', 'first-ol', 'first-li', 'paragraph', 'p-em'))) |
| 129 | + expect_that(pcss('*:contains("E")'), equals(NULL)) # case-sensitive |
| 130 | + expect_that(pcss('.a', c('.b', '*.a', 'ol.a')), equals('first-ol')) |
| 131 | + expect_that(pcss('.c', '*.c'), equals(c('first-ol', 'third-li', 'fourth-li'))) |
| 132 | + expect_that(pcss('ol *.c', c('ol li.c', 'li ~ li.c', 'ol > li.c')), equals(c('third-li', 'fourth-li'))) |
| 133 | + expect_that(pcss('#first-li', c('li#first-li', '*#first-li')), equals('first-li')) |
| 134 | + expect_that(pcss('li div', c('li > div', 'div div')), equals('li-div')) |
| 135 | + expect_that(pcss('div > div'), equals(NULL)) |
| 136 | + expect_that(pcss('div>.c', 'div > .c'), equals('first-ol')) |
| 137 | + expect_that(pcss('div + div'), equals('foobar-div')) |
| 138 | + expect_that(pcss('a ~ a'), equals(c('tag-anchor', 'nofollow-anchor'))) |
| 139 | + expect_that(pcss('a[rel="tag"] ~ a'), equals('nofollow-anchor')) |
| 140 | + expect_that(pcss('ol#first-ol li:last-child'), equals('seventh-li')) |
| 141 | + expect_that(pcss('ol#first-ol *:last-child'), equals(c('li-div', 'seventh-li'))) |
| 142 | + expect_that(pcss('#outer-div:first-child'), equals('outer-div')) |
| 143 | + expect_that(pcss('#outer-div :first-child'), equals(c('name-anchor', 'first-li', 'li-div', 'p-b', 'checkbox-fieldset-disabled', 'area-href'))) |
| 144 | + expect_that(pcss('a[href]'), equals(c('tag-anchor', 'nofollow-anchor'))) |
| 145 | + expect_that(pcss(':not(*)'), equals(NULL)) |
| 146 | + expect_that(pcss('a:not([href])'), equals('name-anchor')) |
| 147 | + expect_that(pcss('ol :Not(li[class])'), equals(c('first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li', 'seventh-li'))) |
| 148 | + # Invalid characters in XPath element names, should not crash |
| 149 | + expect_that(pcss('di\ua0v', 'div\\['), equals(NULL)) |
| 150 | + expect_that(pcss('[h\ua0ref]', '[h\\]ref]'), equals(NULL)) |
| 151 | + |
| 152 | + ## HTML-specific |
| 153 | + expect_that(pcss(':link', html_only=TRUE), equals(c('link-href', 'tag-anchor', 'nofollow-anchor', 'area-href'))) |
| 154 | + expect_that(pcss(':visited', html_only=TRUE), equals(NULL)) |
| 155 | + expect_that(pcss(':enabled', html_only=TRUE), equals(c('link-href', 'tag-anchor', 'nofollow-anchor', 'checkbox-unchecked', 'text-checked', 'checkbox-checked', 'area-href'))) |
| 156 | + expect_that(pcss(':disabled', html_only=TRUE), equals(c('checkbox-disabled', 'checkbox-disabled-checked', 'fieldset', 'checkbox-fieldset-disabled'))) |
| 157 | + expect_that(pcss(':checked', html_only=TRUE), equals(c('checkbox-checked', 'checkbox-disabled-checked'))) |
| 158 | +}) |
0 commit comments