Skip to content

Commit 801be63

Browse files
authored
Remove mutability from parser/tokenizer APIs. (#548)
* Convert tokenizers and tree builders to use interior mutability. * Add methods for replacing the contents of a BufferQueue. * Work around refcell limitations in example code by leaking memory. * Formatting. * Update MSRV to 1.65. * Remove a manual PartialEq implementation. * Suppress existing warnings. * Fix new clippy warning.
1 parent 8bf8177 commit 801be63

33 files changed

+1149
-1011
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
- name: Install stable toolchain
4848
run: |
4949
rustup set profile minimal
50-
rustup override set 1.61.0
50+
rustup override set 1.65.0
5151
5252
- run: cargo check --lib --all-features
5353

html5ever/benches/html5ever.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ struct Sink;
1515
impl TokenSink for Sink {
1616
type Handle = ();
1717

18-
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
18+
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
1919
// Don't use the token, but make sure we don't get
2020
// optimized out entirely.
2121
black_box(token);
@@ -53,7 +53,7 @@ fn run_bench(c: &mut Criterion, name: &str) {
5353

5454
c.bench_function(&test_name, move |b| {
5555
b.iter(|| {
56-
let mut tok = Tokenizer::new(Sink, Default::default());
56+
let tok = Tokenizer::new(Sink, Default::default());
5757
let buffer = BufferQueue::default();
5858
// We are doing clone inside the bench function, this is not ideal, but possibly
5959
// necessary since our iterator consumes the underlying buffer.

html5ever/examples/arena.rs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'
2424
let sink = Sink {
2525
arena,
2626
document: arena.alloc(Node::new(NodeData::Document)),
27-
quirks_mode: QuirksMode::NoQuirks,
27+
quirks_mode: Cell::new(QuirksMode::NoQuirks),
2828
};
2929

3030
parse_document(sink, Default::default())
@@ -41,7 +41,7 @@ type Link<'arena> = Cell<Option<Ref<'arena>>>;
4141
struct Sink<'arena> {
4242
arena: Arena<'arena>,
4343
document: Ref<'arena>,
44-
quirks_mode: QuirksMode,
44+
quirks_mode: Cell<QuirksMode>,
4545
}
4646

4747
/// DOM node which contains links to other nodes in the tree.
@@ -188,14 +188,14 @@ impl<'arena> TreeSink for Sink<'arena> {
188188
self.document
189189
}
190190

191-
fn parse_error(&mut self, _: Cow<'static, str>) {}
191+
fn parse_error(&self, _: Cow<'static, str>) {}
192192

193-
fn get_document(&mut self) -> Ref<'arena> {
193+
fn get_document(&self) -> Ref<'arena> {
194194
self.document
195195
}
196196

197-
fn set_quirks_mode(&mut self, mode: QuirksMode) {
198-
self.quirks_mode = mode;
197+
fn set_quirks_mode(&self, mode: QuirksMode) {
198+
self.quirks_mode.set(mode);
199199
}
200200

201201
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
@@ -209,7 +209,7 @@ impl<'arena> TreeSink for Sink<'arena> {
209209
}
210210
}
211211

212-
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
212+
fn get_template_contents(&self, target: &Ref<'arena>) -> Ref<'arena> {
213213
if let NodeData::Element {
214214
template_contents: Some(contents),
215215
..
@@ -234,7 +234,7 @@ impl<'arena> TreeSink for Sink<'arena> {
234234
}
235235

236236
fn create_element(
237-
&mut self,
237+
&self,
238238
name: QualName,
239239
attrs: Vec<Attribute>,
240240
flags: ElementFlags,
@@ -251,26 +251,26 @@ impl<'arena> TreeSink for Sink<'arena> {
251251
})
252252
}
253253

254-
fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
254+
fn create_comment(&self, text: StrTendril) -> Ref<'arena> {
255255
self.new_node(NodeData::Comment { contents: text })
256256
}
257257

258-
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
258+
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
259259
self.new_node(NodeData::ProcessingInstruction {
260260
target,
261261
contents: data,
262262
})
263263
}
264264

265-
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
265+
fn append(&self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
266266
self.append_common(
267267
child,
268268
|| parent.last_child.get(),
269269
|new_node| parent.append(new_node),
270270
)
271271
}
272272

273-
fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
273+
fn append_before_sibling(&self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
274274
self.append_common(
275275
child,
276276
|| sibling.previous_sibling.get(),
@@ -279,7 +279,7 @@ impl<'arena> TreeSink for Sink<'arena> {
279279
}
280280

281281
fn append_based_on_parent_node(
282-
&mut self,
282+
&self,
283283
element: &Ref<'arena>,
284284
prev_element: &Ref<'arena>,
285285
child: NodeOrText<Ref<'arena>>,
@@ -292,7 +292,7 @@ impl<'arena> TreeSink for Sink<'arena> {
292292
}
293293

294294
fn append_doctype_to_document(
295-
&mut self,
295+
&self,
296296
name: StrTendril,
297297
public_id: StrTendril,
298298
system_id: StrTendril,
@@ -304,7 +304,7 @@ impl<'arena> TreeSink for Sink<'arena> {
304304
}))
305305
}
306306

307-
fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
307+
fn add_attrs_if_missing(&self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
308308
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
309309
attrs.borrow_mut()
310310
} else {
@@ -322,11 +322,11 @@ impl<'arena> TreeSink for Sink<'arena> {
322322
);
323323
}
324324

325-
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
325+
fn remove_from_parent(&self, target: &Ref<'arena>) {
326326
target.detach()
327327
}
328328

329-
fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
329+
fn reparent_children(&self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
330330
let mut next_child = node.first_child.get();
331331
while let Some(child) = next_child {
332332
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));

html5ever/examples/noop-tokenize.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,21 @@
1111

1212
extern crate html5ever;
1313

14+
use std::cell::RefCell;
1415
use std::io;
1516

1617
use html5ever::tendril::*;
1718
use html5ever::tokenizer::{BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer};
1819

1920
/// In our case, our sink only contains a tokens vector
20-
struct Sink(Vec<Token>);
21+
struct Sink(RefCell<Vec<Token>>);
2122

2223
impl TokenSink for Sink {
2324
type Handle = ();
2425

2526
/// Each processed token will be handled by this method
26-
fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
27-
self.0.push(token);
27+
fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
28+
self.0.borrow_mut().push(token);
2829
TokenSinkResult::Continue
2930
}
3031
}
@@ -39,7 +40,7 @@ fn main() {
3940
let input = BufferQueue::default();
4041
input.push_back(chunk.try_reinterpret().unwrap());
4142

42-
let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
43+
let tok = Tokenizer::new(Sink(RefCell::new(Vec::new())), Default::default());
4344
let _ = tok.feed(&input);
4445
assert!(input.is_empty());
4546
tok.end();

html5ever/examples/noop-tree-builder.rs

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
extern crate html5ever;
1212

1313
use std::borrow::Cow;
14+
use std::cell::{Cell, RefCell};
1415
use std::collections::HashMap;
1516
use std::io;
1617

@@ -20,14 +21,14 @@ use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
2021
use html5ever::{Attribute, ExpandedName, QualName};
2122

2223
struct Sink {
23-
next_id: usize,
24-
names: HashMap<usize, QualName>,
24+
next_id: Cell<usize>,
25+
names: RefCell<HashMap<usize, &'static QualName>>,
2526
}
2627

2728
impl Sink {
28-
fn get_id(&mut self) -> usize {
29-
let id = self.next_id;
30-
self.next_id += 2;
29+
fn get_id(&self) -> usize {
30+
let id = self.next_id.get();
31+
self.next_id.set(id + 2);
3132
id
3233
}
3334
}
@@ -43,12 +44,13 @@ impl TreeSink for Sink {
4344
self
4445
}
4546

46-
fn get_document(&mut self) -> usize {
47+
fn get_document(&self) -> usize {
4748
0
4849
}
4950

50-
fn get_template_contents(&mut self, target: &usize) -> usize {
51-
if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded())
51+
fn get_template_contents(&self, target: &usize) -> usize {
52+
if let Some(expanded_name!(html "template")) =
53+
self.names.borrow().get(target).map(|n| n.expanded())
5254
{
5355
target + 1
5456
} else {
@@ -61,53 +63,63 @@ impl TreeSink for Sink {
6163
}
6264

6365
fn elem_name(&self, target: &usize) -> ExpandedName {
64-
self.names.get(target).expect("not an element").expanded()
66+
self.names
67+
.borrow()
68+
.get(target)
69+
.expect("not an element")
70+
.expanded()
6571
}
6672

67-
fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
73+
fn create_element(&self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize {
6874
let id = self.get_id();
69-
self.names.insert(id, name);
75+
// N.B. We intentionally leak memory here to minimize the implementation complexity
76+
// of this example code. A real implementation would either want to use a real
77+
// real DOM tree implentation, or else use an arena as the backing store for
78+
// memory used by the parser.
79+
self.names
80+
.borrow_mut()
81+
.insert(id, Box::leak(Box::new(name)));
7082
id
7183
}
7284

73-
fn create_comment(&mut self, _text: StrTendril) -> usize {
85+
fn create_comment(&self, _text: StrTendril) -> usize {
7486
self.get_id()
7587
}
7688

7789
#[allow(unused_variables)]
78-
fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize {
90+
fn create_pi(&self, target: StrTendril, value: StrTendril) -> usize {
7991
unimplemented!()
8092
}
8193

82-
fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {}
94+
fn append_before_sibling(&self, _sibling: &usize, _new_node: NodeOrText<usize>) {}
8395

8496
fn append_based_on_parent_node(
85-
&mut self,
97+
&self,
8698
_element: &usize,
8799
_prev_element: &usize,
88100
_new_node: NodeOrText<usize>,
89101
) {
90102
}
91103

92-
fn parse_error(&mut self, _msg: Cow<'static, str>) {}
93-
fn set_quirks_mode(&mut self, _mode: QuirksMode) {}
94-
fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {}
104+
fn parse_error(&self, _msg: Cow<'static, str>) {}
105+
fn set_quirks_mode(&self, _mode: QuirksMode) {}
106+
fn append(&self, _parent: &usize, _child: NodeOrText<usize>) {}
95107

96-
fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {}
97-
fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) {
98-
assert!(self.names.contains_key(target), "not an element");
108+
fn append_doctype_to_document(&self, _: StrTendril, _: StrTendril, _: StrTendril) {}
109+
fn add_attrs_if_missing(&self, target: &usize, _attrs: Vec<Attribute>) {
110+
assert!(self.names.borrow().contains_key(target), "not an element");
99111
}
100-
fn remove_from_parent(&mut self, _target: &usize) {}
101-
fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {}
102-
fn mark_script_already_started(&mut self, _node: &usize) {}
112+
fn remove_from_parent(&self, _target: &usize) {}
113+
fn reparent_children(&self, _node: &usize, _new_parent: &usize) {}
114+
fn mark_script_already_started(&self, _node: &usize) {}
103115
}
104116

105117
/// In this example we implement the TreeSink trait which takes each parsed elements and insert
106118
/// it to a hashmap, while each element is given a numeric id.
107119
fn main() {
108120
let sink = Sink {
109-
next_id: 1,
110-
names: HashMap::new(),
121+
next_id: Cell::new(1),
122+
names: RefCell::new(HashMap::new()),
111123
};
112124

113125
// Read HTML from the standard input and parse it

0 commit comments

Comments
 (0)