Skip to content

Commit 4472ec1

Browse files
Jean-Yves Avenardkinetiknz
Jean-Yves Avenard
authored andcommitted
Properly parse AudioSpecificConfig for audioObjectType 5 and 29.
Assumptions were made that ignored if BSAC extension existed (such as HE-AAC with explicit (out of band) SBR signaling. The full syntax of the AudioSpecificConfig as per ISO14496-3-2009 isn't done, left missing are sync extensions (see 1.6.2.1). Not sure if those exists in the wild. We would still successfully parsed those files however and leave the decoders handling it. (BMO 1509875) Fixes #168
1 parent 8e7cf4c commit 4472ec1

File tree

2 files changed

+131
-56
lines changed

2 files changed

+131
-56
lines changed

mp4parse/src/lib.rs

+126-56
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ pub enum SampleEntry {
293293
pub struct ES_Descriptor {
294294
pub audio_codec: CodecType,
295295
pub audio_object_type: Option<u16>,
296+
pub extended_audio_object_type: Option<u16>,
296297
pub audio_sample_rate: Option<u32>,
297298
pub audio_channel_count: Option<u16>,
298299
pub codec_esds: Vec<u8>,
@@ -1520,6 +1521,17 @@ fn find_descriptor(data: &[u8], esds: &mut ES_Descriptor) -> Result<()> {
15201521
Ok(())
15211522
}
15221523

1524+
fn get_audio_object_type(bit_reader: &mut BitReader) -> Result<u16> {
1525+
let mut audio_object_type: u16 = ReadInto::read(bit_reader, 5)?;
1526+
1527+
// Extend audio object type, for example, HE-AAC.
1528+
if audio_object_type == 31 {
1529+
let audio_object_type_ext: u16 = ReadInto::read(bit_reader, 6)?;
1530+
audio_object_type = 32 + audio_object_type_ext;
1531+
}
1532+
Ok(audio_object_type)
1533+
}
1534+
15231535
fn read_ds_descriptor(data: &[u8], esds: &mut ES_Descriptor) -> Result<()> {
15241536
let frequency_table =
15251537
vec![(0x0, 96000), (0x1, 88200), (0x2, 64000), (0x3, 48000),
@@ -1529,13 +1541,7 @@ fn read_ds_descriptor(data: &[u8], esds: &mut ES_Descriptor) -> Result<()> {
15291541

15301542
let bit_reader = &mut BitReader::new(data);
15311543

1532-
let mut audio_object_type: u16 = ReadInto::read(bit_reader, 5)?;
1533-
1534-
// Extend audio object type, for example, HE-AAC.
1535-
if audio_object_type == 31 {
1536-
let audio_object_type_ext: u16 = ReadInto::read(bit_reader, 6)?;
1537-
audio_object_type = 32 + audio_object_type_ext;
1538-
}
1544+
let mut audio_object_type = get_audio_object_type(bit_reader)?;
15391545

15401546
let sample_index: u32 = ReadInto::read(bit_reader, 4)?;
15411547

@@ -1550,60 +1556,124 @@ fn read_ds_descriptor(data: &[u8], esds: &mut ES_Descriptor) -> Result<()> {
15501556
},
15511557
};
15521558

1553-
let mut channel_counts: u16 = ReadInto::read(bit_reader, 4)?;
1554-
1555-
// parsing GASpecificConfig
1556-
bit_reader.skip(1)?; // frameLengthFlag
1557-
let depend_on_core_order: u8 = ReadInto::read(bit_reader, 1)?;
1558-
if depend_on_core_order > 0 {
1559-
bit_reader.skip(14)?; // codeCoderDelay
1560-
}
1561-
bit_reader.skip(1)?; // extensionFlag
1562-
1563-
// When channel_counts is 0, we need to parse the program_config_element
1564-
// to calculate the channel counts.
1565-
if channel_counts == 0 {
1566-
debug!("Parsing program_config_element for channel counts");
1567-
1568-
bit_reader.skip(4)?; // element_instance_tag
1569-
bit_reader.skip(2)?; // object_type
1570-
bit_reader.skip(4)?; // sampling_frequency_index
1571-
let num_front_channel: u8 = ReadInto::read(bit_reader, 4)?;
1572-
let num_side_channel: u8 = ReadInto::read(bit_reader, 4)?;
1573-
let num_back_channel:u8 = ReadInto::read(bit_reader, 4)?;
1574-
let num_lfe_channel: u8 = ReadInto::read(bit_reader, 2)?;
1575-
bit_reader.skip(3)?; // num_assoc_data
1576-
bit_reader.skip(4)?; // num_valid_cc
1577-
1578-
let mono_mixdown_present: bool = ReadInto::read(bit_reader, 1)?;
1579-
if mono_mixdown_present {
1580-
bit_reader.skip(4)?; // mono_mixdown_element_number
1581-
}
1559+
let channel_configuration: u16 = ReadInto::read(bit_reader, 4)?;
15821560

1583-
let stereo_mixdown_present: bool = ReadInto::read(bit_reader, 1)?;
1584-
if stereo_mixdown_present {
1585-
bit_reader.skip(4)?; // stereo_mixdown_element_number
1586-
}
1561+
let extended_audio_object_type = match audio_object_type {
1562+
5 | 29 => Some(5),
1563+
_ => None,
1564+
};
15871565

1588-
let matrix_mixdown_idx_present: bool = ReadInto::read(bit_reader, 1)?;
1589-
if matrix_mixdown_idx_present {
1590-
bit_reader.skip(2)?; // matrix_mixdown_idx
1591-
bit_reader.skip(1)?; // pseudo_surround_enable
1592-
}
1566+
if audio_object_type == 5 || audio_object_type == 29 {
1567+
// We have an explicit signaling for BSAC extension, should the decoder
1568+
// decode the BSAC extension (all Gecko's AAC decoders do), then this is
1569+
// what the stream will actually look like once decoded.
1570+
let _extended_sample_index = ReadInto::read(bit_reader, 4)?;
1571+
let _extended_sample_frequency: Option<u32> = match _extended_sample_index {
1572+
0x0F => Some(ReadInto::read(bit_reader, 24)?),
1573+
_ => frequency_table.iter().find(|item| item.0 == sample_index).map(|x| x.1)
1574+
};
1575+
audio_object_type = get_audio_object_type(bit_reader)?;
1576+
let _extended_channel_configuration = match audio_object_type {
1577+
22 => ReadInto::read(bit_reader, 4)?,
1578+
_ => channel_configuration
1579+
};
1580+
};
15931581

1594-
channel_counts += read_surround_channel_count(bit_reader, num_front_channel)?;
1595-
channel_counts += read_surround_channel_count(bit_reader, num_side_channel)?;
1596-
channel_counts += read_surround_channel_count(bit_reader, num_back_channel)?;
1597-
channel_counts += read_surround_channel_count(bit_reader, num_lfe_channel)?;
1598-
}
1582+
match audio_object_type {
1583+
1 ... 4 | 6 | 7 | 17 | 19 ... 23 => {
1584+
if sample_frequency.is_none() {
1585+
return Err(Error::Unsupported("unknown frequency"));
1586+
}
15991587

1600-
esds.audio_object_type = Some(audio_object_type);
1601-
esds.audio_sample_rate = sample_frequency;
1602-
esds.audio_channel_count = Some(channel_counts);
1603-
assert!(esds.decoder_specific_data.is_empty());
1604-
esds.decoder_specific_data.extend_from_slice(data);
1588+
// parsing GASpecificConfig
1589+
1590+
// If the sampling rate is not one of the rates listed in the right
1591+
// column in Table 4.82, the sampling frequency dependent tables
1592+
// (code tables, scale factor band tables etc.) must be deduced in
1593+
// order for the bitstream payload to be parsed. Since a given
1594+
// sampling frequency is associated with only one sampling frequency
1595+
// table, and since maximum flexibility is desired in the range of
1596+
// possible sampling frequencies, the following table shall be used
1597+
// to associate an implied sampling frequency with the desired
1598+
// sampling frequency dependent tables.
1599+
let sample_frequency_value = match sample_frequency.unwrap() {
1600+
0 ... 9390 => 8000,
1601+
9391 ... 11501 => 11025,
1602+
11502 ... 13855 => 12000,
1603+
13856 ... 18782 => 16000,
1604+
18783 ... 23003 => 22050,
1605+
23004 ... 27712 => 24000,
1606+
27713 ... 37565 => 32000,
1607+
37566 ... 46008 => 44100,
1608+
46009 ... 55425 => 48000,
1609+
55426 ... 75131 => 64000,
1610+
75132 ... 92016 => 88200,
1611+
_ => 96000
1612+
};
16051613

1606-
Ok(())
1614+
bit_reader.skip(1)?; // frameLengthFlag
1615+
let depend_on_core_order: u8 = ReadInto::read(bit_reader, 1)?;
1616+
if depend_on_core_order > 0 {
1617+
bit_reader.skip(14)?; // codeCoderDelay
1618+
}
1619+
bit_reader.skip(1)?; // extensionFlag
1620+
1621+
let channel_counts = match channel_configuration {
1622+
0 => {
1623+
debug!("Parsing program_config_element for channel counts");
1624+
1625+
bit_reader.skip(4)?; // element_instance_tag
1626+
bit_reader.skip(2)?; // object_type
1627+
bit_reader.skip(4)?; // sampling_frequency_index
1628+
let num_front_channel: u8 = ReadInto::read(bit_reader, 4)?;
1629+
let num_side_channel: u8 = ReadInto::read(bit_reader, 4)?;
1630+
let num_back_channel:u8 = ReadInto::read(bit_reader, 4)?;
1631+
let num_lfe_channel: u8 = ReadInto::read(bit_reader, 2)?;
1632+
bit_reader.skip(3)?; // num_assoc_data
1633+
bit_reader.skip(4)?; // num_valid_cc
1634+
1635+
let mono_mixdown_present: bool = ReadInto::read(bit_reader, 1)?;
1636+
if mono_mixdown_present {
1637+
bit_reader.skip(4)?; // mono_mixdown_element_number
1638+
}
1639+
1640+
let stereo_mixdown_present: bool = ReadInto::read(bit_reader, 1)?;
1641+
if stereo_mixdown_present {
1642+
bit_reader.skip(4)?; // stereo_mixdown_element_number
1643+
}
1644+
1645+
let matrix_mixdown_idx_present: bool = ReadInto::read(bit_reader, 1)?;
1646+
if matrix_mixdown_idx_present {
1647+
bit_reader.skip(2)?; // matrix_mixdown_idx
1648+
bit_reader.skip(1)?; // pseudo_surround_enable
1649+
}
1650+
let mut _channel_counts = 0;
1651+
_channel_counts += read_surround_channel_count(bit_reader, num_front_channel)?;
1652+
_channel_counts += read_surround_channel_count(bit_reader, num_side_channel)?;
1653+
_channel_counts += read_surround_channel_count(bit_reader, num_back_channel)?;
1654+
_channel_counts += read_surround_channel_count(bit_reader, num_lfe_channel)?;
1655+
_channel_counts
1656+
},
1657+
1 ... 7 => channel_configuration,
1658+
// Amendment 4 of the AAC standard in 2013 below
1659+
11 => 7, // 6.1 Amendment 4 of the AAC standard in 2013
1660+
12 | 14 => 8, // 7.1 (a/d) of ITU BS.2159
1661+
_ => {
1662+
return Err(Error::Unsupported("invalid channel configuration"));
1663+
}
1664+
};
1665+
1666+
esds.audio_object_type = Some(audio_object_type);
1667+
esds.extended_audio_object_type = extended_audio_object_type;
1668+
esds.audio_sample_rate = Some(sample_frequency_value);
1669+
esds.audio_channel_count = Some(channel_counts);
1670+
assert!(esds.decoder_specific_data.is_empty());
1671+
esds.decoder_specific_data.extend_from_slice(data);
1672+
1673+
Ok(())
1674+
},
1675+
_ => Err(Error::Unsupported("unknown aac audio object type"))
1676+
}
16071677
}
16081678

16091679
fn read_surround_channel_count(bit_reader: &mut BitReader, channels: u8) -> Result<u16> {

mp4parse_capi/src/lib.rs

+5
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ pub struct Mp4parseTrackAudioSampleInfo {
205205
pub bit_depth: u16,
206206
pub sample_rate: u32,
207207
pub profile: u16,
208+
pub extended_profile: u16,
208209
pub codec_specific_config: Mp4parseByteData,
209210
pub extra_data: Mp4parseByteData,
210211
pub protected_data: Mp4parseSinfInfo,
@@ -600,6 +601,10 @@ pub unsafe extern fn mp4parse_get_track_audio_info(parser: *mut Mp4parseParser,
600601
if let Some(profile) = esds.audio_object_type {
601602
sample_info.profile = profile;
602603
}
604+
sample_info.extended_profile = match esds.extended_audio_object_type {
605+
Some(extended_profile) => extended_profile,
606+
_ => sample_info.profile
607+
};
603608
}
604609
AudioCodecSpecific::FLACSpecificBox(ref flac) => {
605610
// Return the STREAMINFO metadata block in the codec_specific.

0 commit comments

Comments
 (0)