Skip to content

Commit 2ede12e

Browse files
authored
Merge pull request #214 from rust-scraper/bump-selectors
Bump selectors, cssparser and html5ever
2 parents e0d4ea7 + fddd90e commit 2ede12e

File tree

8 files changed

+160
-120
lines changed

8 files changed

+160
-120
lines changed

Cargo.lock

Lines changed: 17 additions & 35 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scraper/Cargo.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ repository = "https://github.com/causal-agent/scraper"
1313
readme = "README.md"
1414

1515
[dependencies]
16-
cssparser = "0.31.0"
16+
ahash = "0.8.0"
17+
cssparser = "0.34.0"
1718
ego-tree = "0.9.0"
18-
html5ever = "0.27"
19-
selectors = "0.25.0"
20-
tendril = "0.4.3"
21-
ahash = "0.8"
19+
html5ever = "0.29.0"
2220
indexmap = { version = "2.6.0", optional = true }
21+
precomputed-hash = "0.1.1"
22+
selectors = "0.26.0"
23+
tendril = "0.4.3"
2324

2425
[dependencies.getopts]
2526
version = "0.2.21"

scraper/src/element_ref/element.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use html5ever::Namespace;
22
use selectors::{
33
attr::{AttrSelectorOperation, CaseSensitivity, NamespaceConstraint},
4+
bloom::BloomFilter,
45
matching, Element, OpaqueElement,
56
};
67

@@ -122,6 +123,10 @@ impl<'a> Element for ElementRef<'a> {
122123
self.value().has_class(&name.0, case_sensitivity)
123124
}
124125

126+
fn has_custom_state(&self, _name: &CssLocalName) -> bool {
127+
false
128+
}
129+
125130
fn is_empty(&self) -> bool {
126131
!self
127132
.children()
@@ -134,6 +139,11 @@ impl<'a> Element for ElementRef<'a> {
134139
}
135140

136141
fn apply_selector_flags(&self, _flags: matching::ElementSelectorFlags) {}
142+
143+
fn add_element_unique_hashes(&self, _filter: &mut BloomFilter) -> bool {
144+
// FIXME: Do we want to add `self.node.id()` here?
145+
false
146+
}
137147
}
138148

139149
#[cfg(test)]

scraper/src/element_ref/mod.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use std::ops::Deref;
77
use ego_tree::iter::{Edge, Traverse};
88
use ego_tree::NodeRef;
99
use html5ever::serialize::{serialize, SerializeOpts, TraversalScope};
10-
use selectors::NthIndexCache;
10+
use selectors::matching::SelectorCaches;
1111

1212
use crate::node::Element;
1313
use crate::{Node, Selector};
@@ -49,7 +49,7 @@ impl<'a> ElementRef<'a> {
4949
scope: *self,
5050
inner,
5151
selector,
52-
nth_index_cache: NthIndexCache::default(),
52+
caches: Default::default(),
5353
}
5454
}
5555

@@ -135,7 +135,7 @@ pub struct Select<'a, 'b> {
135135
scope: ElementRef<'a>,
136136
inner: Traverse<'a, Node>,
137137
selector: &'b Selector,
138-
nth_index_cache: NthIndexCache,
138+
caches: SelectorCaches,
139139
}
140140

141141
impl Debug for Select<'_, '_> {
@@ -144,7 +144,7 @@ impl Debug for Select<'_, '_> {
144144
.field("scope", &self.scope)
145145
.field("inner", &self.inner)
146146
.field("selector", &self.selector)
147-
.field("nth_index_cache", &"..")
147+
.field("caches", &"..")
148148
.finish()
149149
}
150150
}
@@ -155,7 +155,7 @@ impl Clone for Select<'_, '_> {
155155
scope: self.scope,
156156
inner: self.inner.clone(),
157157
selector: self.selector,
158-
nth_index_cache: NthIndexCache::default(),
158+
caches: Default::default(),
159159
}
160160
}
161161
}
@@ -170,7 +170,7 @@ impl<'a, 'b> Iterator for Select<'a, 'b> {
170170
if self.selector.matches_with_scope_and_cache(
171171
&element,
172172
Some(self.scope),
173-
&mut self.nth_index_cache,
173+
&mut self.caches,
174174
) {
175175
return Some(element);
176176
}

scraper/src/html/mod.rs

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ use ego_tree::Tree;
1010
use html5ever::serialize::SerializeOpts;
1111
use html5ever::tree_builder::QuirksMode;
1212
use html5ever::{driver, serialize, QualName};
13-
use selectors::NthIndexCache;
13+
use selectors::matching::SelectorCaches;
1414
use tendril::TendrilSink;
1515

1616
use crate::selector::Selector;
1717
use crate::{ElementRef, Node};
1818

19+
pub use tree_sink::HtmlTreeSink;
20+
1921
/// An HTML tree.
2022
///
2123
/// Parsing does not fail hard. Instead, the `quirks_mode` is set and errors are added to the
@@ -67,22 +69,23 @@ impl Html {
6769
/// # fn main() {
6870
/// # let document = "";
6971
/// use html5ever::driver::{self, ParseOpts};
70-
/// use scraper::Html;
72+
/// use scraper::{Html, HtmlTreeSink};
7173
/// use tendril::TendrilSink;
7274
///
73-
/// let parser = driver::parse_document(Html::new_document(), ParseOpts::default());
75+
/// let parser = driver::parse_document(HtmlTreeSink::new(Html::new_document()), ParseOpts::default());
7476
/// let html = parser.one(document);
7577
/// # }
7678
/// ```
7779
pub fn parse_document(document: &str) -> Self {
78-
let parser = driver::parse_document(Self::new_document(), Default::default());
80+
let parser =
81+
driver::parse_document(HtmlTreeSink::new(Self::new_document()), Default::default());
7982
parser.one(document)
8083
}
8184

8285
/// Parses a string of HTML as a fragment.
8386
pub fn parse_fragment(fragment: &str) -> Self {
8487
let parser = driver::parse_fragment(
85-
Self::new_fragment(),
88+
HtmlTreeSink::new(Self::new_fragment()),
8689
Default::default(),
8790
QualName::new(None, ns!(html), local_name!("body")),
8891
Vec::new(),
@@ -95,7 +98,7 @@ impl Html {
9598
Select {
9699
inner: self.tree.nodes(),
97100
selector,
98-
nth_index_cache: NthIndexCache::default(),
101+
caches: Default::default(),
99102
}
100103
}
101104

@@ -127,15 +130,15 @@ impl Html {
127130
pub struct Select<'a, 'b> {
128131
inner: Nodes<'a, Node>,
129132
selector: &'b Selector,
130-
nth_index_cache: NthIndexCache,
133+
caches: SelectorCaches,
131134
}
132135

133136
impl fmt::Debug for Select<'_, '_> {
134137
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
135138
fmt.debug_struct("Select")
136139
.field("inner", &self.inner)
137140
.field("selector", &self.selector)
138-
.field("nth_index_cache", &"..")
141+
.field("caches", &"..")
139142
.finish()
140143
}
141144
}
@@ -145,7 +148,7 @@ impl Clone for Select<'_, '_> {
145148
Self {
146149
inner: self.inner.clone(),
147150
selector: self.selector,
148-
nth_index_cache: NthIndexCache::default(),
151+
caches: Default::default(),
149152
}
150153
}
151154
}
@@ -157,11 +160,9 @@ impl<'a, 'b> Iterator for Select<'a, 'b> {
157160
for node in self.inner.by_ref() {
158161
if let Some(element) = ElementRef::wrap(node) {
159162
if element.parent().is_some()
160-
&& self.selector.matches_with_scope_and_cache(
161-
&element,
162-
None,
163-
&mut self.nth_index_cache,
164-
)
163+
&& self
164+
.selector
165+
.matches_with_scope_and_cache(&element, None, &mut self.caches)
165166
{
166167
return Some(element);
167168
}
@@ -182,11 +183,9 @@ impl<'a, 'b> DoubleEndedIterator for Select<'a, 'b> {
182183
for node in self.inner.by_ref().rev() {
183184
if let Some(element) = ElementRef::wrap(node) {
184185
if element.parent().is_some()
185-
&& self.selector.matches_with_scope_and_cache(
186-
&element,
187-
None,
188-
&mut self.nth_index_cache,
189-
)
186+
&& self
187+
.selector
188+
.matches_with_scope_and_cache(&element, None, &mut self.caches)
190189
{
191190
return Some(element);
192191
}

0 commit comments

Comments
 (0)