Skip to content

Commit d553a78

Browse files
authored
Merge pull request #185 from Yoric/entropy-with-dictionary-builder
Towards a dictionary builder
2 parents 45ef914 + 9e857c2 commit d553a78

36 files changed

+959
-349
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ target/
33
Cargo.lock
44
node_modules/
55
*~
6+
**/.DS_Store
7+
.vscode/

Cargo.toml

+10-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ binjs_meta = { path = "crates/binjs_meta", version = "*" }
1616
binjs_shared = { path = "crates/binjs_shared", version = "*" }
1717
assert_matches = "*"
1818
bencher = "^0.1"
19+
bincode = "*"
1920
clap = "*"
2021
env_logger = "^0.5"
2122
glob = "*"
@@ -27,7 +28,7 @@ lzw = "*"
2728
rand = "^0.4"
2829
test-logger = "*"
2930
vec_map = "*"
30-
webidl = "^0.7"
31+
webidl = "*"
3132
yaml-rust = "*"
3233

3334
[[bin]]
@@ -45,6 +46,14 @@ path = "src/bin/decode.rs"
4546
name = "binjs_dump"
4647
path = "src/bin/dump.rs"
4748

49+
[[bin]]
50+
# From a sample of JS source files, extract the distribution
51+
# of probabilities for all ASTs, write this distribution to
52+
# disk. The distribution may then be reused by binjs_encode
53+
# using the entropy format to encode further files.
54+
name = "binjs_generate_prediction_tables"
55+
path = "src/bin/generate_dictionary.rs"
56+
4857
[[bench]]
4958
name = "bench_fb"
5059
harness = false

crates/binjs_es6/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ log = "*"
1515
[build-dependencies]
1616
binjs_generate_library = { path = "../binjs_generate_library/", version = "*" }
1717
binjs_meta = { path = "../binjs_meta/", version = "*" }
18-
webidl = "^0.7"
18+
webidl = "^0.8"

crates/binjs_es6/src/io.rs

+27-27
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use binjs_io::{ self, Deserialization, Guard, TokenReader, TokenReaderError, TokenWriterError };
1+
use binjs_io::{ self, Deserialization, Guard, TokenReader, TokenReaderError, TokenWriterTreeAdapter, TokenWriterError };
22
pub use binjs_io::{ Serialization, TokenSerializer, TokenWriter };
33
use binjs_shared::{ FieldName, IdentifierName, InterfaceName, Offset, PropertyKey, SharedString, self };
44

@@ -131,7 +131,7 @@ impl<W> Serializer<W> where W: TokenWriter {
131131
writer
132132
}
133133
}
134-
pub fn serialize<T>(&mut self, value: T, path: &mut IOPath) -> Result<W:: Tree, TokenWriterError> where Self: Serialization<W, T> {
134+
pub fn serialize<T>(&mut self, value: T, path: &mut IOPath) -> Result<(), TokenWriterError> where Self: Serialization<W, T> {
135135
(self as &mut Serialization<W, T>).serialize(value, path)
136136
}
137137
}
@@ -143,99 +143,99 @@ impl<W> TokenSerializer<W> for Serializer<W> where W: TokenWriter {
143143
}
144144

145145
impl<W> Serialization<W, Option<bool>> for Serializer<W> where W: TokenWriter {
146-
fn serialize(&mut self, value: Option<bool>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
146+
fn serialize(&mut self, value: Option<bool>, path: &mut IOPath) -> Result<(), TokenWriterError> {
147147
self.writer.bool_at(value, path)
148148
}
149149
}
150150
impl<W> Serialization<W, bool> for Serializer<W> where W: TokenWriter {
151-
fn serialize(&mut self, value: bool, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
151+
fn serialize(&mut self, value: bool, path: &mut IOPath) -> Result<(), TokenWriterError> {
152152
self.writer.bool_at(Some(value), path)
153153
}
154154
}
155155
impl<W> Serialization<W, Option<f64>> for Serializer<W> where W: TokenWriter {
156-
fn serialize(&mut self, value: Option<f64>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
156+
fn serialize(&mut self, value: Option<f64>, path: &mut IOPath) -> Result<(), TokenWriterError> {
157157
self.writer.float_at(value, path)
158158
}
159159
}
160160
impl<W> Serialization<W, f64> for Serializer<W> where W: TokenWriter {
161-
fn serialize(&mut self, value: f64, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
161+
fn serialize(&mut self, value: f64, path: &mut IOPath) -> Result<(), TokenWriterError> {
162162
self.writer.float_at(Some(value), path)
163163
}
164164
}
165165
impl<W> Serialization<W, u32> for Serializer<W> where W: TokenWriter {
166-
fn serialize(&mut self, value: u32, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
166+
fn serialize(&mut self, value: u32, path: &mut IOPath) -> Result<(), TokenWriterError> {
167167
self.writer.unsigned_long_at(value, path)
168168
}
169169
}
170170
impl<'a, W> Serialization<W, &'a Option<bool>> for Serializer<W> where W: TokenWriter {
171-
fn serialize(&mut self, value: &'a Option<bool>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
171+
fn serialize(&mut self, value: &'a Option<bool>, path: &mut IOPath) -> Result<(), TokenWriterError> {
172172
self.writer.bool_at(value.clone(), path)
173173
}
174174
}
175175
impl<'a, W> Serialization<W, &'a bool> for Serializer<W> where W: TokenWriter {
176-
fn serialize(&mut self, value: &'a bool, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
176+
fn serialize(&mut self, value: &'a bool, path: &mut IOPath) -> Result<(), TokenWriterError> {
177177
self.writer.bool_at(Some(*value), path)
178178
}
179179
}
180180
impl<'a, W> Serialization<W, &'a Option<f64>> for Serializer<W> where W: TokenWriter {
181-
fn serialize(&mut self, value: &'a Option<f64>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
181+
fn serialize(&mut self, value: &'a Option<f64>, path: &mut IOPath) -> Result<(), TokenWriterError> {
182182
self.writer.float_at(value.clone(), path)
183183
}
184184
}
185185
impl<'a, W> Serialization<W, &'a f64> for Serializer<W> where W: TokenWriter {
186-
fn serialize(&mut self, value: &'a f64, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
186+
fn serialize(&mut self, value: &'a f64, path: &mut IOPath) -> Result<(), TokenWriterError> {
187187
self.writer.float_at(Some(*value), path)
188188
}
189189
}
190190
impl<'a, W> Serialization<W, &'a u32> for Serializer<W> where W: TokenWriter {
191-
fn serialize(&mut self, value: &'a u32, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
191+
fn serialize(&mut self, value: &'a u32, path: &mut IOPath) -> Result<(), TokenWriterError> {
192192
self.writer.unsigned_long_at(value.clone(), path)
193193
}
194194
}
195195
/*
196196
impl<'a, W> Serialization<W, Option<&'a str>> for Serializer<W> where W: TokenWriter {
197-
fn serialize(&mut self, value: Option<&'a str>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
197+
fn serialize(&mut self, value: Option<&'a str>, path: &mut IOPath) -> Result<(), TokenWriterError> {
198198
self.writer.string_at(value, path)
199199
}
200200
}
201201
impl<'a, W> Serialization<W, &'a str> for Serializer<W> where W: TokenWriter {
202-
fn serialize(&mut self, value: &'a str, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
202+
fn serialize(&mut self, value: &'a str, path: &mut IOPath) -> Result<(), TokenWriterError> {
203203
self.writer.string_at(Some(value), path)
204204
}
205205
}
206206
*/
207207
impl<'a, W> Serialization<W, &'a SharedString> for Serializer<W> where W: TokenWriter {
208-
fn serialize(&mut self, value: &'a SharedString, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
208+
fn serialize(&mut self, value: &'a SharedString, path: &mut IOPath) -> Result<(), TokenWriterError> {
209209
self.writer.string_at(Some(value), path)
210210
}
211211
}
212212
impl<'a, W> Serialization<W, &'a Option<SharedString>> for Serializer<W> where W: TokenWriter {
213-
fn serialize(&mut self, value: &'a Option<SharedString>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
213+
fn serialize(&mut self, value: &'a Option<SharedString>, path: &mut IOPath) -> Result<(), TokenWriterError> {
214214
self.writer.string_at(value.as_ref(), path)
215215
}
216216
}
217217
impl<'a, W> Serialization<W, &'a IdentifierName> for Serializer<W> where W: TokenWriter {
218-
fn serialize(&mut self, value: &'a IdentifierName, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
218+
fn serialize(&mut self, value: &'a IdentifierName, path: &mut IOPath) -> Result<(), TokenWriterError> {
219219
self.writer.identifier_name_at(Some(&value), path)
220220
}
221221
}
222222
impl<'a, W> Serialization<W, &'a PropertyKey> for Serializer<W> where W: TokenWriter {
223-
fn serialize(&mut self, value: &'a PropertyKey, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
223+
fn serialize(&mut self, value: &'a PropertyKey, path: &mut IOPath) -> Result<(), TokenWriterError> {
224224
self.writer.property_key_at(Some(&value), path)
225225
}
226226
}
227227
impl<'a, W> Serialization<W, &'a Option<IdentifierName>> for Serializer<W> where W: TokenWriter {
228-
fn serialize(&mut self, value: &'a Option<IdentifierName>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
228+
fn serialize(&mut self, value: &'a Option<IdentifierName>, path: &mut IOPath) -> Result<(), TokenWriterError> {
229229
self.writer.identifier_name_at(value.as_ref(), path)
230230
}
231231
}
232232
impl<'a, W> Serialization<W, &'a Option<PropertyKey>> for Serializer<W> where W: TokenWriter {
233-
fn serialize(&mut self, value: &'a Option<PropertyKey>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
233+
fn serialize(&mut self, value: &'a Option<PropertyKey>, path: &mut IOPath) -> Result<(), TokenWriterError> {
234234
self.writer.property_key_at(value.as_ref(), path)
235235
}
236236
}
237237
impl<'a, W> Serialization<W, &'a Offset> for Serializer<W> where W: TokenWriter {
238-
fn serialize(&mut self, _: &'a Offset, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {
238+
fn serialize(&mut self, _: &'a Offset, path: &mut IOPath) -> Result<(), TokenWriterError> {
239239
self.writer.offset_at(path)
240240
}
241241
}
@@ -275,9 +275,9 @@ impl Encoder {
275275
}
276276
pub fn encode<'a, AST>(&self, format: &'a mut binjs_io::Format, ast: &'a AST) -> Result<Box<AsRef<[u8]>>, TokenWriterError>
277277
where
278-
Serializer<binjs_io::simple::TreeTokenWriter> : Serialization<binjs_io::simple::TreeTokenWriter, &'a AST>,
279-
Serializer<binjs_io::multipart::TreeTokenWriter> : Serialization<binjs_io::multipart::TreeTokenWriter, &'a AST>,
280-
Serializer<binjs_io::xml::Encoder> : Serialization<binjs_io::xml::Encoder, &'a AST>,
278+
Serializer<TokenWriterTreeAdapter<binjs_io::simple::TreeTokenWriter>> : Serialization<TokenWriterTreeAdapter<binjs_io::simple::TreeTokenWriter>, &'a AST>,
279+
Serializer<TokenWriterTreeAdapter<binjs_io::multipart::TreeTokenWriter>> : Serialization<TokenWriterTreeAdapter<binjs_io::multipart::TreeTokenWriter>, &'a AST>,
280+
Serializer<TokenWriterTreeAdapter<binjs_io::xml::Encoder>> : Serialization<TokenWriterTreeAdapter<binjs_io::xml::Encoder>, &'a AST>,
281281
// Serializer<binjs_io::entropy::write::TreeTokenWriter<'a>> : Serialization<binjs_io::entropy::write::TreeTokenWriter<'a>, &'a AST>
282282
/*
283283
#[cfg(multistream)]
@@ -290,14 +290,14 @@ impl Encoder {
290290
match *format {
291291
binjs_io::Format::Simple { .. } => {
292292
let writer = binjs_io::simple::TreeTokenWriter::new();
293-
let mut serializer = Serializer::new(writer);
293+
let mut serializer = Serializer::new(TokenWriterTreeAdapter::new(writer));
294294
serializer.serialize(ast, &mut path)?;
295295
let (data, _) = serializer.done()?;
296296
Ok(Box::new(data))
297297
}
298298
binjs_io::Format::Multipart { ref mut targets, .. } => {
299299
let writer = binjs_io::multipart::TreeTokenWriter::new(targets.clone());
300-
let mut serializer = Serializer::new(writer);
300+
let mut serializer = Serializer::new(TokenWriterTreeAdapter::new(writer));
301301
serializer.serialize(ast, &mut path)?;
302302
let (data, _) = serializer.done()?;
303303
Ok(Box::new(data))
@@ -323,7 +323,7 @@ impl Encoder {
323323
}
324324
binjs_io::Format::XML => {
325325
let writer = binjs_io::xml::Encoder::new();
326-
let mut serializer = Serializer::new(writer);
326+
let mut serializer = Serializer::new(TokenWriterTreeAdapter::new(writer));
327327
serializer.serialize(ast, &mut path)?;
328328
let (data, _) = serializer.done()?;
329329
Ok(Box::new(data))

crates/binjs_generate_library/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,5 @@ log = "^0.4"
1111

1212
[dev-dependencies]
1313
clap = "^2"
14-
webidl = "^0.7"
14+
webidl = "*"
1515
yaml-rust = "^0.4"

crates/binjs_generate_library/src/lib.rs

+26-28
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ impl FromJSON for {name} {{
275275

276276
let to_writer = format!("
277277
impl<'a, W> Serialization<W, &'a {name}> for Serializer<W> where W: TokenWriter {{
278-
fn serialize(&mut self, value: &'a {name}, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {{
278+
fn serialize(&mut self, value: &'a {name}, path: &mut IOPath) -> Result<(), TokenWriterError> {{
279279
debug!(target: \"serialize_es6\", \"Serializing string enum {name}\");
280280
let str = match *value {{
281281
{variants}
@@ -574,22 +574,21 @@ impl ToJSON for {name} {{
574574

575575
let to_writer = format!("
576576
impl<'a, W> Serialization<W, &'a Option<{name}>> for Serializer<W> where W: TokenWriter {{
577-
fn serialize(&mut self, value: &'a Option<{name}>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {{
577+
fn serialize(&mut self, value: &'a Option<{name}>, path: &mut IOPath) -> Result<(), TokenWriterError> {{
578578
debug!(target: \"serialize_es6\", \"Serializing optional sum {name}\");
579579
match *value {{
580580
None => {{
581581
let interface_name = InterfaceName::from_str(\"{null}\");
582-
self.writer.enter_tagged_tuple_at(&interface_name, 0, path)?;
583-
let result = self.writer.tagged_tuple_at(&interface_name, &[], path)?;
584-
self.writer.exit_tagged_tuple_at(&interface_name, path)?;
585-
Ok(result)
582+
self.writer.enter_tagged_tuple_at(&interface_name, &[], path)?;
583+
self.writer.exit_tagged_tuple_at(&interface_name, &[], path)?;
584+
Ok(())
586585
}}
587586
Some(ref sum) => (self as &mut Serialization<W, &'a {name}>).serialize(sum, path)
588587
}}
589588
}}
590589
}}
591590
impl<'a, W> Serialization<W, &'a {name}> for Serializer<W> where W: TokenWriter {{
592-
fn serialize(&mut self, value: &'a {name}, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {{
591+
fn serialize(&mut self, value: &'a {name}, path: &mut IOPath) -> Result<(), TokenWriterError> {{
593592
debug!(target: \"serialize_es6\", \"Serializing sum {name}\");
594593
match *value {{
595594
{variants}
@@ -841,17 +840,15 @@ impl<'a> Walker<'a> for ViewMut{name}<'a> {{
841840
842841
843842
impl<'a, W> Serialization<W, &'a {name}> for Serializer<W> where W: TokenWriter {{
844-
fn serialize(&mut self, value: &'a {name}, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {{
843+
fn serialize(&mut self, value: &'a {name}, path: &mut IOPath) -> Result<(), TokenWriterError> {{
845844
debug!(target: \"serialize_es6\", \"Serializing list {name}\");
846845
self.writer.enter_list_at(value.len(), path)?;
847-
let mut children = Vec::with_capacity(value.len());
848846
for child in value {{
849847
// All the children of the list share the same path.
850-
children.push(self.serialize(child, path)?);
848+
self.serialize(child, path)?;
851849
}}
852-
let result = self.writer.list_at(children, path)?;
853850
self.writer.exit_list_at(path)?;
854-
Ok(result)
851+
Ok(())
855852
}}
856853
}}
857854
",
@@ -1059,43 +1056,44 @@ impl<R> Deserialization<R, Option<{name}>> for Deserializer<R> where R: TokenRea
10591056
.len();
10601057
let to_writer = format!("
10611058
impl<'a, W> Serialization<W, &'a Option<{name}>> for Serializer<W> where W: TokenWriter {{
1062-
fn serialize(&mut self, value: &'a Option<{name}>, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {{
1059+
fn serialize(&mut self, value: &'a Option<{name}>, path: &mut IOPath) -> Result<(), TokenWriterError> {{
10631060
debug!(target: \"serialize_es6\", \"Serializing optional tagged tuple {name}\");
10641061
match *value {{
10651062
None => {{
10661063
let interface_name = InterfaceName::from_str(\"{null}\");
1067-
self.writer.enter_tagged_tuple_at(&interface_name, 0, path)?;
1068-
let result = self.writer.tagged_tuple_at(&interface_name, &[], path)?;
1069-
self.writer.exit_tagged_tuple_at(&interface_name, path)?;
1070-
Ok(result)
1064+
self.writer.enter_tagged_tuple_at(&interface_name, &[], path)?;
1065+
self.writer.exit_tagged_tuple_at(&interface_name, &[], path)?;
1066+
Ok(())
10711067
}}
10721068
Some(ref sum) => (self as &mut Serialization<W, &'a {name}>).serialize(sum, path)
10731069
}}
10741070
}}
10751071
}}
10761072
impl<'a, W> Serialization<W, &'a {name}> for Serializer<W> where W: TokenWriter {{
1077-
fn serialize(&mut self, {value}: &'a {name}, path: &mut IOPath) -> Result<W::Tree, TokenWriterError> {{
1073+
fn serialize(&mut self, {value}: &'a {name}, path: &mut IOPath) -> Result<(), TokenWriterError> {{
10781074
debug!(target: \"serialize_es6\", \"Serializing tagged tuple {name}\");
10791075
let interface_name = InterfaceName::from_str(\"{name}\"); // String is shared
1076+
let field_names = [{field_names}];
10801077
1081-
self.writer.enter_tagged_tuple_at(&interface_name, {len}, path)?;
1078+
self.writer.enter_tagged_tuple_at(&interface_name, &field_names, path)?;
10821079
path.enter_interface(interface_name.clone());
1083-
let {mut} children = Vec::with_capacity({len});
10841080
{fields}
1085-
let result = self.writer.{tagged_tuple}(&interface_name, &children, path);
10861081
path.exit_interface(interface_name.clone());
1087-
self.writer.exit_tagged_tuple_at(&interface_name, path)?;
1082+
self.writer.exit_tagged_tuple_at(&interface_name, &field_names, path)?;
10881083
1089-
result
1084+
Ok(())
10901085
}}
10911086
}}
10921087
",
1093-
mut = if len > 0 { "mut" } else { "" },
10941088
value = if len > 0 { "value" } else { "_" },
10951089
null = null_name,
10961090
name = name,
1097-
len = len,
1098-
tagged_tuple = if interface.is_scope() { "tagged_scoped_tuple_at" } else { "tagged_tuple_at" },
1091+
field_names = interface.contents()
1092+
.fields()
1093+
.iter()
1094+
.map(|field| format!("&FieldName::from_str(\"{field_name}\")",
1095+
field_name = field.name().to_str()))
1096+
.format(", "),
10991097
fields = interface.contents()
11001098
.fields()
11011099
.iter()
@@ -1105,9 +1103,9 @@ impl<'a, W> Serialization<W, &'a {name}> for Serializer<W> where W: TokenWriter
11051103
let field_name = FieldName::from_str(\"{field_name}\");
11061104
let path_item = ({index}, field_name.clone()); // String is shared
11071105
path.enter_field(path_item.clone());
1108-
let child = (self as &mut Serialization<W, &'a _>).serialize(&value.{rust_field_name}, path);
1106+
let result = (self as &mut Serialization<W, &'a _>).serialize(&value.{rust_field_name}, path);
11091107
path.exit_field(path_item);
1110-
children.push((field_name, child?));",
1108+
result?;",
11111109
index = index,
11121110
field_name = field.name().to_str(),
11131111
rust_field_name = field.name().to_rust_identifier_case()))

crates/binjs_generic/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,4 @@ clap = "^2.30"
2626
[build-dependencies]
2727
binjs_generate_library = { path = "../binjs_generate_library", version = "*" }
2828
binjs_meta = { path = "../binjs_meta", version = "*" }
29-
webidl = "^0.7"
29+
webidl = "^0.8"

0 commit comments

Comments
 (0)