Skip to content

Commit 8bf8177

Browse files
authored
Convert BufferQueue to use Interior Mutability (#542)
* Convert BufferQueue to use Interior Mutability Signed-off-by: Taym <[email protected]> * remove mut Signed-off-by: Taym <[email protected]> * Remove &mut BufferQueue usage Signed-off-by: Taym <[email protected]> * Fix format Signed-off-by: Taym <[email protected]> * Fix clippy warnings Signed-off-by: Taym <[email protected]> --------- Signed-off-by: Taym <[email protected]>
1 parent fd246b3 commit 8bf8177

File tree

16 files changed

+114
-116
lines changed

16 files changed

+114
-116
lines changed

html5ever/benches/html5ever.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@ fn run_bench(c: &mut Criterion, name: &str) {
5454
c.bench_function(&test_name, move |b| {
5555
b.iter(|| {
5656
let mut tok = Tokenizer::new(Sink, Default::default());
57-
let mut buffer = BufferQueue::default();
57+
let buffer = BufferQueue::default();
5858
// We are doing clone inside the bench function, this is not ideal, but possibly
5959
// necessary since our iterator consumes the underlying buffer.
6060
for buf in input.clone().into_iter() {
6161
buffer.push_back(buf);
62-
let _ = tok.feed(&mut buffer);
62+
let _ = tok.feed(&buffer);
6363
}
64-
let _ = tok.feed(&mut buffer);
64+
let _ = tok.feed(&buffer);
6565
tok.end();
6666
})
6767
});

html5ever/examples/noop-tokenize.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@ fn main() {
3636
let mut chunk = ByteTendril::new();
3737
io::stdin().read_to_tendril(&mut chunk).unwrap();
3838

39-
let mut input = BufferQueue::default();
39+
let input = BufferQueue::default();
4040
input.push_back(chunk.try_reinterpret().unwrap());
4141

4242
let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default());
43-
let _ = tok.feed(&mut input);
43+
let _ = tok.feed(&input);
4444
assert!(input.is_empty());
4545
tok.end();
4646
}

html5ever/examples/tokenize.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ fn main() {
9090
let mut chunk = ByteTendril::new();
9191
io::stdin().read_to_tendril(&mut chunk).unwrap();
9292

93-
let mut input = BufferQueue::default();
93+
let input = BufferQueue::default();
9494
input.push_back(chunk.try_reinterpret().unwrap());
9595

9696
let mut tok = Tokenizer::new(
@@ -100,7 +100,7 @@ fn main() {
100100
..Default::default()
101101
},
102102
);
103-
let _ = tok.feed(&mut input);
103+
let _ = tok.feed(&input);
104104

105105
assert!(input.is_empty());
106106
tok.end();

html5ever/src/driver.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
106106
fn process(&mut self, t: StrTendril) {
107107
self.input_buffer.push_back(t);
108108
// FIXME: Properly support </script> somehow.
109-
while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
109+
while let TokenizerResult::Script(_) = self.tokenizer.feed(&self.input_buffer) {}
110110
}
111111

112112
// FIXME: Is it too noisy to report every character decoding error?
@@ -118,7 +118,7 @@ impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> {
118118

119119
fn finish(mut self) -> Self::Output {
120120
// FIXME: Properly support </script> somehow.
121-
while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {}
121+
while let TokenizerResult::Script(_) = self.tokenizer.feed(&self.input_buffer) {}
122122
assert!(self.input_buffer.is_empty());
123123
self.tokenizer.end();
124124
self.tokenizer.sink.sink.finish()

html5ever/src/tokenizer/char_ref/mod.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ impl CharRefTokenizer {
115115
pub(super) fn step<Sink: TokenSink>(
116116
&mut self,
117117
tokenizer: &mut Tokenizer<Sink>,
118-
input: &mut BufferQueue,
118+
input: &BufferQueue,
119119
) -> Status {
120120
if self.result.is_some() {
121121
return Done;
@@ -135,7 +135,7 @@ impl CharRefTokenizer {
135135
fn do_begin<Sink: TokenSink>(
136136
&mut self,
137137
tokenizer: &mut Tokenizer<Sink>,
138-
input: &mut BufferQueue,
138+
input: &BufferQueue,
139139
) -> Status {
140140
match unwrap_or_return!(tokenizer.peek(input), Stuck) {
141141
'a'..='z' | 'A'..='Z' | '0'..='9' => {
@@ -156,7 +156,7 @@ impl CharRefTokenizer {
156156
fn do_octothorpe<Sink: TokenSink>(
157157
&mut self,
158158
tokenizer: &mut Tokenizer<Sink>,
159-
input: &mut BufferQueue,
159+
input: &BufferQueue,
160160
) -> Status {
161161
let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
162162
match c {
@@ -177,7 +177,7 @@ impl CharRefTokenizer {
177177
fn do_numeric<Sink: TokenSink>(
178178
&mut self,
179179
tokenizer: &mut Tokenizer<Sink>,
180-
input: &mut BufferQueue,
180+
input: &BufferQueue,
181181
base: u32,
182182
) -> Status {
183183
let c = unwrap_or_return!(tokenizer.peek(input), Stuck);
@@ -207,7 +207,7 @@ impl CharRefTokenizer {
207207
fn do_numeric_semicolon<Sink: TokenSink>(
208208
&mut self,
209209
tokenizer: &mut Tokenizer<Sink>,
210-
input: &mut BufferQueue,
210+
input: &BufferQueue,
211211
) -> Status {
212212
match unwrap_or_return!(tokenizer.peek(input), Stuck) {
213213
';' => tokenizer.discard_char(input),
@@ -221,7 +221,7 @@ impl CharRefTokenizer {
221221
fn unconsume_numeric<Sink: TokenSink>(
222222
&mut self,
223223
tokenizer: &mut Tokenizer<Sink>,
224-
input: &mut BufferQueue,
224+
input: &BufferQueue,
225225
) -> Status {
226226
let mut unconsume = StrTendril::from_char('#');
227227
if let Some(c) = self.hex_marker {
@@ -270,7 +270,7 @@ impl CharRefTokenizer {
270270
fn do_named<Sink: TokenSink>(
271271
&mut self,
272272
tokenizer: &mut Tokenizer<Sink>,
273-
input: &mut BufferQueue,
273+
input: &BufferQueue,
274274
) -> Status {
275275
// peek + discard skips over newline normalization, therefore making it easier to
276276
// un-consume
@@ -304,14 +304,14 @@ impl CharRefTokenizer {
304304
tokenizer.emit_error(msg);
305305
}
306306

307-
fn unconsume_name(&mut self, input: &mut BufferQueue) {
307+
fn unconsume_name(&mut self, input: &BufferQueue) {
308308
input.push_front(self.name_buf_opt.take().unwrap());
309309
}
310310

311311
fn finish_named<Sink: TokenSink>(
312312
&mut self,
313313
tokenizer: &mut Tokenizer<Sink>,
314-
input: &mut BufferQueue,
314+
input: &BufferQueue,
315315
end_char: Option<char>,
316316
) -> Status {
317317
match self.name_match {
@@ -395,7 +395,7 @@ impl CharRefTokenizer {
395395
fn do_bogus_name<Sink: TokenSink>(
396396
&mut self,
397397
tokenizer: &mut Tokenizer<Sink>,
398-
input: &mut BufferQueue,
398+
input: &BufferQueue,
399399
) -> Status {
400400
// peek + discard skips over newline normalization, therefore making it easier to
401401
// un-consume
@@ -414,7 +414,7 @@ impl CharRefTokenizer {
414414
pub(super) fn end_of_file<Sink: TokenSink>(
415415
&mut self,
416416
tokenizer: &mut Tokenizer<Sink>,
417-
input: &mut BufferQueue,
417+
input: &BufferQueue,
418418
) {
419419
while self.result.is_none() {
420420
match self.state {

html5ever/src/tokenizer/mod.rs

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
206206
}
207207

208208
/// Feed an input string into the tokenizer.
209-
pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle> {
209+
pub fn feed(&mut self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> {
210210
if input.is_empty() {
211211
return TokenizerResult::Done;
212212
}
@@ -248,7 +248,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
248248
//§ preprocessing-the-input-stream
249249
// Get the next input character, which might be the character
250250
// 'c' that we already consumed from the buffers.
251-
fn get_preprocessed_char(&mut self, mut c: char, input: &mut BufferQueue) -> Option<char> {
251+
fn get_preprocessed_char(&mut self, mut c: char, input: &BufferQueue) -> Option<char> {
252252
if self.ignore_lf {
253253
self.ignore_lf = false;
254254
if c == '\n' {
@@ -283,7 +283,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
283283

284284
//§ tokenization
285285
// Get the next input character, if one is available.
286-
fn get_char(&mut self, input: &mut BufferQueue) -> Option<char> {
286+
fn get_char(&mut self, input: &BufferQueue) -> Option<char> {
287287
if self.reconsume {
288288
self.reconsume = false;
289289
Some(self.current_char)
@@ -294,7 +294,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
294294
}
295295
}
296296

297-
fn pop_except_from(&mut self, input: &mut BufferQueue, set: SmallCharSet) -> Option<SetResult> {
297+
fn pop_except_from(&mut self, input: &BufferQueue, set: SmallCharSet) -> Option<SetResult> {
298298
// Bail to the slow path for various corner cases.
299299
// This means that `FromSet` can contain characters not in the set!
300300
// It shouldn't matter because the fallback `FromSet` case should
@@ -319,12 +319,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
319319
// BufferQueue::eat.
320320
//
321321
// NB: this doesn't set the current input character.
322-
fn eat(
323-
&mut self,
324-
input: &mut BufferQueue,
325-
pat: &str,
326-
eq: fn(&u8, &u8) -> bool,
327-
) -> Option<bool> {
322+
fn eat(&mut self, input: &BufferQueue, pat: &str, eq: fn(&u8, &u8) -> bool) -> Option<bool> {
328323
if self.ignore_lf {
329324
self.ignore_lf = false;
330325
if self.peek(input) == Some('\n') {
@@ -336,15 +331,17 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
336331
match input.eat(pat, eq) {
337332
None if self.at_eof => Some(false),
338333
None => {
339-
self.temp_buf.extend(input);
334+
while let Some(data) = input.next() {
335+
self.temp_buf.push_char(data);
336+
}
340337
None
341338
},
342339
Some(matched) => Some(matched),
343340
}
344341
}
345342

346343
/// Run the state machine for as long as we can.
347-
fn run(&mut self, input: &mut BufferQueue) -> TokenizerResult<Sink::Handle> {
344+
fn run(&mut self, input: &BufferQueue) -> TokenizerResult<Sink::Handle> {
348345
if self.opts.profile {
349346
loop {
350347
let state = self.state;
@@ -567,7 +564,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
567564
}
568565
}
569566

570-
fn discard_char(&mut self, input: &mut BufferQueue) {
567+
fn discard_char(&mut self, input: &BufferQueue) {
571568
// peek() deals in un-processed characters (no newline normalization), while get_char()
572569
// does.
573570
//
@@ -696,7 +693,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
696693
// Return true if we should be immediately re-invoked
697694
// (this just simplifies control flow vs. break / continue).
698695
#[allow(clippy::never_loop)]
699-
fn step(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle> {
696+
fn step(&mut self, input: &BufferQueue) -> ProcessResult<Sink::Handle> {
700697
if self.char_ref_tokenizer.is_some() {
701698
return self.step_char_ref_tokenizer(input);
702699
}
@@ -1382,7 +1379,7 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
13821379
}
13831380
}
13841381

1385-
fn step_char_ref_tokenizer(&mut self, input: &mut BufferQueue) -> ProcessResult<Sink::Handle> {
1382+
fn step_char_ref_tokenizer(&mut self, input: &BufferQueue) -> ProcessResult<Sink::Handle> {
13861383
// FIXME HACK: Take and replace the tokenizer so we don't
13871384
// double-mut-borrow self. This is why it's boxed.
13881385
let mut tok = self.char_ref_tokenizer.take().unwrap();
@@ -1432,19 +1429,19 @@ impl<Sink: TokenSink> Tokenizer<Sink> {
14321429
pub fn end(&mut self) {
14331430
// Handle EOF in the char ref sub-tokenizer, if there is one.
14341431
// Do this first because it might un-consume stuff.
1435-
let mut input = BufferQueue::default();
1432+
let input = BufferQueue::default();
14361433
match self.char_ref_tokenizer.take() {
14371434
None => (),
14381435
Some(mut tok) => {
1439-
tok.end_of_file(self, &mut input);
1436+
tok.end_of_file(self, &input);
14401437
self.process_char_ref(tok.get_result());
14411438
},
14421439
}
14431440

14441441
// Process all remaining buffered input.
14451442
// If we're waiting for lookahead, we're not gonna get it.
14461443
self.at_eof = true;
1447-
assert!(matches!(self.run(&mut input), TokenizerResult::Done));
1444+
assert!(matches!(self.run(&input), TokenizerResult::Done));
14481445
assert!(input.is_empty());
14491446

14501447
loop {
@@ -1668,10 +1665,10 @@ mod test {
16681665
fn tokenize(input: Vec<StrTendril>, opts: TokenizerOpts) -> Vec<(Token, u64)> {
16691666
let sink = LinesMatch::new();
16701667
let mut tok = Tokenizer::new(sink, opts);
1671-
let mut buffer = BufferQueue::default();
1668+
let buffer = BufferQueue::default();
16721669
for chunk in input.into_iter() {
16731670
buffer.push_back(chunk);
1674-
let _ = tok.feed(&mut buffer);
1671+
let _ = tok.feed(&buffer);
16751672
}
16761673
tok.end();
16771674
tok.sink.lines

0 commit comments

Comments
 (0)