Skip to content

Commit 76e4603

Browse files
committed
version bump 1.2.0: MAD
1 parent 1a1920c commit 76e4603

29 files changed

+1849
-90
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ test_files_pres
2727
*.[eE][tT][hH]
2828
*.[zZ][iI][pP]
2929
*.[mM][sS][iIgG]
30+
*.[mM][hH][tT]
3031
*.123
3132
*.htm
3233
*.html

.travis.yml

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
language: node_js
2+
dist: xenial
23
node_js:
4+
- "14"
5+
- "13"
36
- "12"
47
- "11"
58
- "10"
@@ -13,12 +16,15 @@ node_js:
1316
- "0.10"
1417
- "0.8"
1518
before_install:
16-
- "npm install -g [email protected]"
19+
- "npm config set strict-ssl false"
20+
- "./misc/node_version.sh"
1721
- "npm install -g [email protected] voc"
1822
- "npm install blanket"
19-
- "npm install xlsjs crc-32"
23+
- "npm install word crc-32"
2024
- "npm install coveralls mocha-lcov-reporter"
2125
before_script:
2226
- "make init"
27+
install:
28+
- npm install
2329
after_success:
2430
- "make coveralls-spin"

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ This log is intended to keep track of backwards-incompatible changes, including
44
but not limited to API changes and file location changes. Minor behavioral
55
changes may not be included if they are not expected to break existing code.
66

7+
## 1.2.0 (2020-07-09)
8+
9+
* Support for MAD file format (MIME aggregate document)
10+
* Spun off the CLI tool to the `cfb-cli` module
11+
712
## 1.1.0 (2018-09-04)
813

914
* Support for ZIP file format

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ FMT=xls doc ppt misc full
44
REQS=
55
ADDONS=
66
AUXTARGETS=xlscfb.js
7-
CMDS=bin/cfb.njs
7+
CMDS=packages/cfb-cli/bin/cfb.njs
88
HTMLLINT=index.html
99

1010
ULIB=$(shell echo $(LIB) | tr a-z A-Z)

README.md

+10-26
Original file line numberDiff line numberDiff line change
@@ -44,28 +44,8 @@ var data = workbook.content;
4444

4545
## Command-Line Utility Usage
4646

47-
It is preferable to install the library globally with npm:
48-
49-
```bash
50-
$ npm install -g cfb
51-
```
52-
53-
The global installation adds a command `cfb` which can work with files:
54-
55-
- `cfb file [names...]` extracts the contents of the file. If additional names
56-
are supplied, only the listed files will be extracted.
57-
58-
- `cfb -l file` lists the contained files (following `unzip -l` "short format")
59-
60-
- `cfb -r file` attempts to repair by reading and re-writing the file.
61-
This fixes some issues with files generated by non-standard tools.
62-
63-
- `cfb -c file [files...]` creates a new file containing the listed files.
64-
The default root entry name is `Root Entry`.
65-
66-
- `cfb -a file [files...]` adds the listed files to the original file.
67-
68-
- `cfb -d file [files...]` deletes the listed files from the original file.
47+
The [`cfb-cli`](https://www.npmjs.com/package/cfb-cli) module ships with a CLI
48+
tool for manipulating and inspecting supported files.
6949

7050

7151
## JS API
@@ -113,10 +93,11 @@ name, if there are no slashes) and returns an entry object or null if not found.
11393

11494
`opts.fileType` controls the output file type:
11595

116-
| `fileType` | output |
117-
|:-------------------|:--------------|
118-
| `'cfb'` (default) | CFB container |
119-
| `'zip'` | ZIP file |
96+
| `fileType` | output |
97+
|:-------------------|:------------------------|
98+
| `'cfb'` (default) | CFB container |
99+
| `'zip'` | ZIP file |
100+
| `'mad'` | MIME aggregate document |
120101

121102
`opts.compression` enables DEFLATE compression for ZIP file type.
122103

@@ -157,6 +138,7 @@ interface CFBEntry {
157138
content: Buffer | number[] | Uint8Array; /** Raw Content */
158139
ct?: Date; /** Creation Time */
159140
mt?: Date; /** Modification Time */
141+
ctype?: String; /** Content-Type (for MAD) */
160142
}
161143
```
162144

@@ -172,4 +154,6 @@ granted by the Apache 2.0 License are reserved by the Original Author.
172154
- `MS-CFB`: Compound File Binary File Format
173155
- ZIP `APPNOTE.TXT`: .ZIP File Format Specification
174156
- RFC1951: https://www.ietf.org/rfc/rfc1951.txt
157+
- RFC2045: https://www.ietf.org/rfc/rfc2045.txt
158+
- RFC2557: https://www.ietf.org/rfc/rfc2557.txt
175159

bits/05_buf.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ function new_unsafe_buf(len/*:number*/) {
2424
/* jshint +W056 */
2525
}
2626

27-
var s2a = function s2a(s/*:string*/)/*:any*/ {
27+
var s2a = function s2a(s/*:string*/)/*:RawBytes*/ {
2828
if(has_buf) return Buffer_from(s, "binary");
2929
return s.split("").map(function(x/*:string*/)/*:number*/{ return x.charCodeAt(0) & 0xff; });
3030
};

bits/31_version.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
exports.version = '1.1.4';
1+
exports.version = '1.2.0';

bits/40_parse.js

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
22
if(file[0] == 0x50 && file[1] == 0x4b) return parse_zip(file, options);
3+
if((file[0] | 0x20) == 0x6d && (file[1]|0x20) == 0x69) return parse_mad(file, options);
34
if(file.length < 512) throw new Error("CFB file size " + file.length + " < 512");
45
var mver = 3;
56
var ssz = 512;

bits/60_writehead.js

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes*/ {
1+
function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ {
22
var _opts = options || {};
3+
/* MAD is order-sensitive, skip rebuild and sort */
4+
if(_opts.fileType == 'mad') return write_mad(cfb, _opts);
35
rebuild_cfb(cfb);
4-
if(_opts.fileType == 'zip') return write_zip(cfb, _opts);
6+
switch(_opts.fileType) {
7+
case 'zip': return write_zip(cfb, _opts);
8+
//case 'mad': return write_mad(cfb, _opts);
9+
}

bits/77_writeutils.js

+6-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@ function a2s(o/*:RawBytes*/)/*:string*/ {
1313

1414
function write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ {
1515
var o = _write(cfb, options);
16-
switch(options && options.type) {
16+
switch(options && options.type || "buffer") {
1717
case "file": get_fs(); fs.writeFileSync(options.filename, (o/*:any*/)); return o;
18-
case "binary": return a2s(o);
19-
case "base64": return Base64.encode(a2s(o));
18+
case "binary": return typeof o == "string" ? o : a2s(o);
19+
case "base64": return Base64.encode(typeof o == "string" ? o : a2s(o));
20+
case "buffer": if(has_buf) return Buffer.isBuffer(o) ? o : Buffer_from(o);
21+
/* falls through */
22+
case "array": return typeof o == "string" ? s2a(o) : o;
2023
}
2124
return o;
2225
}

bits/84_mht.js

+206
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
var ContentTypeMap = ({
2+
"htm": "text/html",
3+
"xml": "text/xml",
4+
5+
"gif": "image/gif",
6+
"jpg": "image/jpeg",
7+
"png": "image/png",
8+
9+
"mso": "application/x-mso",
10+
"thmx": "application/vnd.ms-officetheme",
11+
"sh33tj5": "application/octet-stream"
12+
}/*:any*/);
13+
14+
function get_content_type(fi/*:CFBEntry*/, fp/*:string*/)/*:string*/ {
15+
if(fi.ctype) return fi.ctype;
16+
17+
var ext = fi.name || "", m = ext.match(/\.([^\.]+)$/);
18+
if(m && ContentTypeMap[m[1]]) return ContentTypeMap[m[1]];
19+
20+
if(fp) {
21+
m = (ext = fp).match(/[\.\\]([^\.\\])+$/);
22+
if(m && ContentTypeMap[m[1]]) return ContentTypeMap[m[1]];
23+
}
24+
25+
return "application/octet-stream";
26+
}
27+
28+
/* 76 character chunks TODO: intertwine encoding */
29+
function write_base64_76(bstr/*:string*/)/*:string*/ {
30+
var data = Base64.encode(bstr);
31+
var o = [];
32+
for(var i = 0; i < data.length; i+= 76) o.push(data.slice(i, i+76));
33+
return o.join("\r\n") + "\r\n";
34+
}
35+
36+
/*
37+
Rules for QP:
38+
- escape =## applies for all non-display characters and literal "="
39+
- space or tab at end of line must be encoded
40+
- \r\n newlines can be preserved, but bare \r and \n must be escaped
41+
- lines must not exceed 76 characters, use soft breaks =\r\n
42+
43+
TODO: Some files from word appear to write line extensions with bare equals:
44+
45+
```
46+
<table class=3DMsoTableGrid border=3D1 cellspacing=3D0 cellpadding=3D0 width=
47+
="70%"
48+
```
49+
*/
50+
function write_quoted_printable(text/*:string*/)/*:string*/ {
51+
var encoded = text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF=]/g, function(c) {
52+
var w = c.charCodeAt(0).toString(16).toUpperCase();
53+
return "=" + (w.length == 1 ? "0" + w : w);
54+
});
55+
56+
encoded = encoded.replace(/ $/mg, "=20").replace(/\t$/mg, "=09");
57+
58+
if(encoded.charAt(0) == "\n") encoded = "=0D" + encoded.slice(1);
59+
encoded = encoded.replace(/\r(?!\n)/mg, "=0D").replace(/\n\n/mg, "\n=0A").replace(/([^\r\n])\n/mg, "$1=0A");
60+
61+
var o/*:Array<string>*/ = [], split = encoded.split("\r\n");
62+
for(var si = 0; si < split.length; ++si) {
63+
var str = split[si];
64+
if(str.length == 0) { o.push(""); continue; }
65+
for(var i = 0; i < str.length;) {
66+
var end = 76;
67+
var tmp = str.slice(i, i + end);
68+
if(tmp.charAt(end - 1) == "=") end --;
69+
else if(tmp.charAt(end - 2) == "=") end -= 2;
70+
else if(tmp.charAt(end - 3) == "=") end -= 3;
71+
tmp = str.slice(i, i + end);
72+
i += end;
73+
if(i < str.length) tmp += "=";
74+
o.push(tmp);
75+
}
76+
}
77+
78+
return o.join("\r\n");
79+
}
80+
function parse_quoted_printable(data/*:Array<string>*/)/*:RawBytes*/ {
81+
var o = [];
82+
83+
/* unify long lines */
84+
for(var di = 0; di < data.length; ++di) {
85+
var line = data[di];
86+
while(di <= data.length && line.charAt(line.length - 1) == "=") line = line.slice(0, line.length - 1) + data[++di];
87+
o.push(line);
88+
}
89+
90+
/* decode */
91+
for(var oi = 0; oi < o.length; ++oi) o[oi] = o[oi].replace(/=[0-9A-Fa-f]{2}/g, function($$) { return String.fromCharCode(parseInt($$.slice(1), 16)); });
92+
return s2a(o.join("\r\n"));
93+
}
94+
95+
96+
function parse_mime(cfb/*:CFBContainer*/, data/*:Array<string>*/, root/*:string*/)/*:void*/ {
97+
var fname = "", cte = "", ctype = "", fdata;
98+
var di = 0;
99+
for(;di < 10; ++di) {
100+
var line = data[di];
101+
if(!line || line.match(/^\s*$/)) break;
102+
var m = line.match(/^(.*?):\s*([^\s].*)$/);
103+
if(m) switch(m[1].toLowerCase()) {
104+
case "content-location": fname = m[2].trim(); break;
105+
case "content-type": ctype = m[2].trim(); break;
106+
case "content-transfer-encoding": cte = m[2].trim(); break;
107+
}
108+
}
109+
++di;
110+
switch(cte.toLowerCase()) {
111+
case 'base64': fdata = s2a(Base64.decode(data.slice(di).join(""))); break;
112+
case 'quoted-printable': fdata = parse_quoted_printable(data.slice(di)); break;
113+
default: throw new Error("Unsupported Content-Transfer-Encoding " + cte);
114+
}
115+
var file = cfb_add(cfb, fname.slice(root.length), fdata, {unsafe: true});
116+
if(ctype) file.ctype = ctype;
117+
}
118+
119+
function parse_mad(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
120+
if(a2s(file.slice(0,13)).toLowerCase() != "mime-version:") throw new Error("Unsupported MAD header");
121+
var root = (options && options.root || "");
122+
// $FlowIgnore
123+
var data = (has_buf && Buffer.isBuffer(file) ? file.toString("binary") : a2s(file)).split("\r\n");
124+
var di = 0, row = "";
125+
126+
/* if root is not specified, scan for the common prefix */
127+
for(di = 0; di < data.length; ++di) {
128+
row = data[di];
129+
if(!/^Content-Location:/i.test(row)) continue;
130+
row = row.slice(row.indexOf("file"));
131+
if(!root) root = row.slice(0, row.lastIndexOf("/") + 1);
132+
if(row.slice(0, root.length) == root) continue;
133+
while(root.length > 0) {
134+
root = root.slice(0, root.length - 1);
135+
root = root.slice(0, root.lastIndexOf("/") + 1);
136+
if(row.slice(0,root.length) == root) break;
137+
}
138+
}
139+
140+
var mboundary = (data[1] || "").match(/boundary="(.*?)"/);
141+
if(!mboundary) throw new Error("MAD cannot find boundary");
142+
var boundary = "--" + (mboundary[1] || "");
143+
144+
var FileIndex/*:CFBFileIndex*/ = [], FullPaths/*:Array<string>*/ = [];
145+
var o = {
146+
FileIndex: FileIndex,
147+
FullPaths: FullPaths
148+
};
149+
init_cfb(o);
150+
var start_di, fcnt = 0;
151+
for(di = 0; di < data.length; ++di) {
152+
var line = data[di];
153+
if(line !== boundary && line !== boundary + "--") continue;
154+
if(fcnt++) parse_mime(o, data.slice(start_di, di), root);
155+
start_di = di;
156+
}
157+
return o;
158+
}
159+
160+
function write_mad(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:string*/ {
161+
var opts = options || {};
162+
var boundary = opts.boundary || "SheetJS";
163+
boundary = '------=' + boundary;
164+
165+
var out = [
166+
'MIME-Version: 1.0',
167+
'Content-Type: multipart/related; boundary="' + boundary.slice(2) + '"',
168+
'',
169+
'',
170+
''
171+
];
172+
173+
var root = cfb.FullPaths[0], fp = root, fi = cfb.FileIndex[0];
174+
for(var i = 1; i < cfb.FullPaths.length; ++i) {
175+
fp = cfb.FullPaths[i].slice(root.length);
176+
fi = cfb.FileIndex[i];
177+
if(!fi.size || !fi.content || fp == "\u0001Sh33tJ5") continue;
178+
179+
/* Normalize filename */
180+
fp = fp.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF]/g, function(c) {
181+
return "_x" + c.charCodeAt(0).toString(16) + "_";
182+
}).replace(/[\u0080-\uFFFF]/g, function(u) {
183+
return "_u" + u.charCodeAt(0).toString(16) + "_";
184+
});
185+
186+
/* Extract content as binary string */
187+
var ca = fi.content;
188+
// $FlowIgnore
189+
var cstr = has_buf && Buffer.isBuffer(ca) ? ca.toString("binary") : a2s(ca);
190+
191+
/* 4/5 of first 1024 chars ascii -> quoted printable, else base64 */
192+
var dispcnt = 0, L = Math.min(1024, cstr.length), cc = 0;
193+
for(var csl = 0; csl <= L; ++csl) if((cc=cstr.charCodeAt(csl)) >= 0x20 && cc < 0x80) ++dispcnt;
194+
var qp = dispcnt >= L * 4 / 5;
195+
196+
out.push(boundary);
197+
out.push('Content-Location: ' + (opts.root || 'file:///C:/SheetJS/') + fp);
198+
out.push('Content-Transfer-Encoding: ' + (qp ? 'quoted-printable' : 'base64'));
199+
out.push('Content-Type: ' + get_content_type(fi, fp));
200+
out.push('');
201+
202+
out.push(qp ? write_quoted_printable(cstr) : write_base64_76(cstr));
203+
}
204+
out.push(boundary + '--\r\n');
205+
return out.join("\r\n");
206+
}

0 commit comments

Comments
 (0)