Skip to content

Commit c784502

Browse files
committed
EBML: Improve ElementReader tree navigation
1 parent 181d842 commit c784502

8 files changed

+131
-102
lines changed

lofty/src/ebml/element_reader.rs

+113-85
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::ebml::vint::VInt;
22
use crate::error::Result;
33
use crate::macros::{decode_err, try_vec};
44

5-
use std::io::Read;
5+
use std::io::{self, Read};
66
use std::ops::{Deref, DerefMut};
77

88
use byteorder::{BigEndian, ReadBytesExt};
@@ -241,15 +241,21 @@ ebml_master_elements! {
241241
},
242242
}
243243

244-
#[derive(Debug)]
244+
const MAX_DEPTH: u8 = 16;
245+
const ROOT_DEPTH: u8 = 1;
246+
247+
#[derive(Copy, Clone, Debug)]
248+
struct Depth {
249+
level: u8,
250+
length: VInt,
251+
}
252+
253+
#[derive(Copy, Clone, Debug)]
245254
struct MasterElementContext {
246255
element: MasterElement,
247-
remaining_length: VInt,
256+
depth: Depth,
248257
}
249258

250-
const MAX_DEPTH: u8 = 16;
251-
const ROOT_DEPTH: u8 = 1;
252-
253259
#[derive(Debug)]
254260
struct ElementReaderContext {
255261
depth: u8,
@@ -269,8 +275,7 @@ struct ElementReaderContext {
269275
/// to know which depth to lock the reader at again (if any).
270276
///
271277
/// This will **always** be sorted, so the current lock will be at the end.
272-
lock_depths: Vec<u8>,
273-
lock_len: VInt,
278+
lock_depths: Vec<usize>,
274279
}
275280

276281
impl Default for ElementReaderContext {
@@ -284,11 +289,41 @@ impl Default for ElementReaderContext {
284289
max_size_length: 8,
285290
locked: false,
286291
lock_depths: Vec::with_capacity(MAX_DEPTH as usize),
287-
lock_len: VInt::ZERO,
288292
}
289293
}
290294
}
291295

296+
impl ElementReaderContext {
297+
fn current_master(&self) -> Option<MasterElementContext> {
298+
if self.depth == 0 {
299+
return None;
300+
}
301+
302+
self.masters.get((self.depth - 1) as usize).copied()
303+
}
304+
305+
fn current_master_length(&self) -> VInt {
306+
assert!(self.depth > 0);
307+
self.current_master()
308+
.expect("should have current master element")
309+
.depth
310+
.length
311+
}
312+
313+
fn propagate_length_change(&mut self, length: u64) {
314+
for master in &mut self.masters {
315+
master.depth.length = master.depth.length.saturating_sub(length);
316+
}
317+
}
318+
319+
fn remaining_lock_length(&self) -> VInt {
320+
assert!(self.locked && !self.lock_depths.is_empty());
321+
322+
let lock_depth = *self.lock_depths.last().unwrap();
323+
self.masters[lock_depth - 1].depth.length
324+
}
325+
}
326+
292327
#[derive(Debug)]
293328
pub(crate) enum ElementReaderYield {
294329
Master((ElementIdent, VInt)),
@@ -321,30 +356,37 @@ impl ElementReaderYield {
321356
/// An EBML element reader.
322357
pub struct ElementReader<R> {
323358
reader: R,
324-
ctx: ElementReaderContext,
359+
pub(self) ctx: ElementReaderContext,
325360
}
326361

327362
impl<R> Read for ElementReader<R>
328363
where
329364
R: Read,
330365
{
331-
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
366+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
332367
if self.ctx.locked {
333-
let lock_len = self.ctx.lock_len.value();
368+
let lock_len = self.ctx.remaining_lock_length().value();
334369
if buf.len() > lock_len as usize {
335-
return Err(std::io::Error::new(
336-
std::io::ErrorKind::UnexpectedEof,
370+
return Err(io::Error::new(
371+
io::ErrorKind::UnexpectedEof,
337372
"Cannot read past the end of the current master element",
338373
));
339374
}
340375
}
341376

342377
let ret = self.reader.read(buf)?;
343-
let len = self.current_master_length();
344-
self.set_current_master_length(len.saturating_sub(ret as u64));
378+
if self.ctx.current_master().is_none() {
379+
return Ok(ret);
380+
}
345381

346-
if self.ctx.locked {
347-
self.ctx.lock_len = self.ctx.lock_len.saturating_sub(ret as u64);
382+
self.ctx.propagate_length_change(ret as u64);
383+
384+
let current_master = self
385+
.ctx
386+
.current_master()
387+
.expect("should have current master element");
388+
if current_master.depth.length == 0 {
389+
self.goto_previous_master()?;
348390
}
349391

350392
Ok(ret)
@@ -370,42 +412,17 @@ where
370412
self.ctx.max_size_length = len
371413
}
372414

373-
fn current_master(&self) -> Option<MasterElement> {
374-
if self.ctx.depth == 0 {
375-
assert!(self.ctx.masters.is_empty());
376-
return None;
377-
}
378-
379-
Some(self.ctx.masters[(self.ctx.depth - 1) as usize].element)
380-
}
381-
382-
fn current_master_length(&self) -> VInt {
383-
if self.ctx.depth == 0 {
384-
assert!(self.ctx.masters.is_empty());
385-
return VInt::ZERO;
386-
}
387-
388-
self.ctx.masters[(self.ctx.depth - 1) as usize].remaining_length
389-
}
390-
391-
fn set_current_master_length(&mut self, length: VInt) {
392-
if self.ctx.depth == 0 {
393-
assert!(self.ctx.masters.is_empty());
394-
return;
395-
}
396-
397-
self.ctx.masters[(self.ctx.depth - 1) as usize].remaining_length = length;
398-
}
399-
400415
fn push_new_master(&mut self, master: MasterElement, size: VInt) -> Result<()> {
416+
log::debug!("New master element: {:?}", master.id);
417+
401418
if self.ctx.depth == MAX_DEPTH {
402419
decode_err!(@BAIL Ebml, "Maximum depth reached");
403420
}
404421

405422
// If we are at the root level, we do not increment the depth
406423
// since we are not actually inside a master element.
407424
// For example, we are moving from \EBML to \Segment.
408-
let at_root_level = self.ctx.depth == ROOT_DEPTH && self.current_master_length() == 0;
425+
let at_root_level = self.ctx.depth == ROOT_DEPTH && self.ctx.current_master_length() == 0;
409426
if at_root_level {
410427
assert_eq!(self.ctx.masters.len(), 1);
411428
self.ctx.masters.clear();
@@ -415,20 +432,47 @@ where
415432

416433
self.ctx.masters.push(MasterElementContext {
417434
element: master,
418-
remaining_length: size,
435+
depth: Depth {
436+
level: self.ctx.depth,
437+
length: size,
438+
},
419439
});
420440

421441
Ok(())
422442
}
423443

444+
fn goto_previous_master(&mut self) -> io::Result<()> {
445+
let lock_depth = self
446+
.ctx
447+
.lock_depths
448+
.last()
449+
.copied()
450+
.unwrap_or(ROOT_DEPTH as usize);
451+
if lock_depth == self.ctx.depth as usize || self.ctx.depth == 0 {
452+
return Ok(());
453+
}
454+
455+
if self.ctx.depth == ROOT_DEPTH {
456+
return Err(io::Error::new(
457+
io::ErrorKind::Other,
458+
"Cannot go to previous master element, already at root",
459+
));
460+
}
461+
462+
while self.ctx.current_master_length() == 0
463+
&& (self.ctx.depth as usize != lock_depth && self.ctx.depth != ROOT_DEPTH)
464+
{
465+
self.ctx.depth -= 1;
466+
let _ = self.ctx.masters.pop();
467+
}
468+
469+
Ok(())
470+
}
471+
424472
fn goto_next_master(&mut self) -> Result<ElementReaderYield> {
425473
self.exhaust_current_master()?;
426474

427-
let header = ElementHeader::read(
428-
&mut self.reader,
429-
self.ctx.max_id_length,
430-
self.ctx.max_size_length,
431-
)?;
475+
let header = ElementHeader::read(self, self.ctx.max_id_length, self.ctx.max_size_length)?;
432476
let Some(master) = master_elements().get(&header.id) else {
433477
// We encountered an unknown master element
434478
return Ok(ElementReaderYield::Unknown(header));
@@ -439,35 +483,23 @@ where
439483
Ok(ElementReaderYield::Master((master.id, header.size)))
440484
}
441485

442-
fn goto_previous_master(&mut self) -> Result<()> {
443-
if self.ctx.depth == ROOT_DEPTH || self.ctx.lock_depths.last() == Some(&self.ctx.depth) {
444-
decode_err!(@BAIL Ebml, "Cannot go to previous master element, already at root")
445-
}
446-
447-
self.exhaust_current_master()?;
448-
449-
self.ctx.depth -= 1;
450-
let _ = self.ctx.masters.pop();
451-
452-
Ok(())
453-
}
454-
455486
pub(crate) fn next(&mut self) -> Result<ElementReaderYield> {
456-
let Some(current_master) = self.current_master() else {
487+
let Some(current_master) = self.ctx.current_master() else {
457488
return self.goto_next_master();
458489
};
459490

460-
if self.ctx.locked && self.ctx.lock_len == 0 {
491+
if self.ctx.locked && self.ctx.remaining_lock_length() == 0 {
461492
return Ok(ElementReaderYield::Eof);
462493
}
463494

464-
if self.current_master_length() == 0 {
495+
if current_master.depth.length == 0 {
465496
return self.goto_next_master();
466497
}
467498

468499
let header = ElementHeader::read(self, self.ctx.max_id_length, self.ctx.max_size_length)?;
469500

470501
let Some((_, child)) = current_master
502+
.element
471503
.children
472504
.iter()
473505
.find(|(id, _)| *id == header.id)
@@ -476,12 +508,11 @@ where
476508
};
477509

478510
if child.data_type == ElementDataType::Master {
479-
self.push_new_master(
480-
*master_elements()
481-
.get(&header.id)
482-
.expect("Nested master elements should be defined at this level."),
483-
header.size,
484-
)?;
511+
let master = *master_elements()
512+
.get(&header.id)
513+
.expect("Nested master elements should be defined at this level.");
514+
515+
self.push_new_master(master, header.size)?;
485516

486517
// We encountered a nested master element
487518
return Ok(ElementReaderYield::Master((child.ident, header.size)));
@@ -491,21 +522,19 @@ where
491522
}
492523

493524
pub(crate) fn exhaust_current_master(&mut self) -> Result<()> {
494-
let master_length = self.current_master_length().value();
495-
if master_length == 0 {
525+
let Some(current_master) = self.ctx.current_master() else {
496526
return Ok(());
497-
}
527+
};
498528

499-
self.skip(master_length)?;
529+
self.skip(current_master.depth.length.value())?;
500530
Ok(())
501531
}
502532

503533
pub(crate) fn lock(&mut self) {
504534
log::trace!("New lock at depth: {}", self.ctx.depth);
505535

506536
self.ctx.locked = true;
507-
self.ctx.lock_len = self.current_master_length();
508-
self.ctx.lock_depths.push(self.ctx.depth);
537+
self.ctx.lock_depths.push(self.ctx.depth as usize);
509538
}
510539

511540
pub(crate) fn unlock(&mut self) {
@@ -520,7 +549,6 @@ where
520549
};
521550

522551
log::trace!("Moving lock to depth: {}", last);
523-
self.ctx.lock_len = self.ctx.masters[(*last - 1) as usize].remaining_length;
524552
}
525553

526554
pub(crate) fn children(&mut self) -> ElementChildIterator<'_, R> {
@@ -531,12 +559,12 @@ where
531559
pub(crate) fn skip(&mut self, length: u64) -> Result<()> {
532560
log::trace!("Skipping {} bytes", length);
533561

534-
let current_master_length = self.current_master_length();
562+
let current_master_length = self.ctx.current_master_length();
535563
if length > current_master_length.value() {
536564
decode_err!(@BAIL Ebml, "Cannot skip past the end of the current master element")
537565
}
538566

539-
std::io::copy(&mut self.by_ref().take(length), &mut std::io::sink())?;
567+
std::io::copy(&mut self.by_ref().take(length), &mut io::sink())?;
540568
Ok(())
541569
}
542570

@@ -688,9 +716,9 @@ where
688716
.lock_depths
689717
.last()
690718
.expect("a child iterator should always have a lock depth");
691-
assert!(lock_depth <= self.reader.ctx.depth);
719+
assert!(lock_depth <= self.reader.ctx.depth as usize);
692720

693-
self.reader.ctx.masters[(lock_depth - 1) as usize].remaining_length == 0
721+
self.reader.ctx.remaining_lock_length() == 0
694722
}
695723
}
696724

lofty/src/ebml/read.rs

+4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ where
3333
// First we need to go through the elements in the EBML master element
3434
read_ebml_header(&mut element_reader, parse_options, &mut properties)?;
3535

36+
log::debug!("File verified to be EBML");
37+
3638
loop {
3739
let res = element_reader.next()?;
3840
match res {
@@ -70,6 +72,8 @@ fn read_ebml_header<R>(
7072
where
7173
R: Read + Seek,
7274
{
75+
log::trace!("Reading EBML header");
76+
7377
match element_reader.next() {
7478
Ok(ElementReaderYield::Master((ElementIdent::EBML, _))) => {},
7579
Ok(_) => decode_err!(@BAIL Ebml, "File does not start with an EBML master element"),

lofty/src/ebml/read/segment.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ where
7979
}
8080
},
8181
ElementReaderYield::Eof => break,
82-
_ => unreachable!("Unhandled child element in \\Ebml\\Segment: {child:?}"),
82+
_ => unreachable!("Unhandled child element in \\Segment: {child:?}"),
8383
}
8484
}
8585

0 commit comments

Comments
 (0)