Skip to content

Commit 99d86f0

Browse files
committed
version bump 0.6.0: case insensitive find
1 parent 369f05a commit 99d86f0

File tree

6 files changed

+109
-5
lines changed

6 files changed

+109
-5
lines changed

.jshintrc

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"bitwise": false,
3+
"curly": false
4+
}

APACHE.LICENSE LICENSE

File renamed without changes.

README.md

+74-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ This is a Pure-JS implementation of MS-CFB: Compound File Binary File Format, a
44
format used in many Microsoft file types (such as XLS, DOC, and other Microsoft
55
Office file types).
66

7-
# Installation and Usage
7+
# Utility Installation and Usage
88

99
The package is available on NPM:
1010

@@ -18,6 +18,79 @@ files that line up with the tree-based structure of the storage. Metadata
1818
such as the red-black tree are discarded (and in the future, new CFB containers
1919
will exclusively use black nodes)
2020

21+
# Library Installation and Usage
22+
23+
In the browser:
24+
25+
<script src="cfb.js" type="text/javascript"></script>
26+
27+
In node:
28+
29+
var CFB = require('cfb');
30+
31+
For example, to get the Workbook content from an XLS file:
32+
33+
var cfb = CFB.read(filename, {type: 'file'});
34+
var has_vba = cfb.Directory['Workbook']
35+
36+
## API
37+
38+
The CFB object exposes the following methods and properties:
39+
40+
`CFB.parse(blob)` takes a nodejs Buffer or an array of bytes and returns an
41+
parsed representation of the data.
42+
43+
`CFB.read(blob, options)` wraps `parse`. `options.type` controls the behavior:
44+
45+
- `file`: `blob` should be a file name
46+
- `base64`: `blob` should be a base64 string
47+
- `binary`: `blob` should be a binary string
48+
49+
## Container Object Description
50+
51+
The object returned by `parse` and `read` can be found in the source (`rval`).
52+
It has the following properties and methods:
53+
54+
- `.find(path)` performs a case-insensitive match for the path (or file name, if
55+
there are no slashes) and returns an entry object (described later) or null if
56+
not found
57+
58+
- `.FullPaths` is an array of the names of all of the streams (files) and
59+
storages (directories) in the container. The paths are properly prefixed from
60+
the root entry (so the entries are unique)
61+
62+
- `.FullPathDir` is an object whose keys are entries in `.FullPaths` and whose
63+
values are objects with metadata and content (described below)
64+
65+
- `.FileIndex` is an array of the objects from `.FullPathDir`, in the same order
66+
as `.FullPaths`.
67+
68+
- `.raw` contains the raw header and sectors
69+
70+
- `.Paths` is an array of the names of all of the streams (files) and storages
71+
(directories) in the container. There is no disambiguation in the case of
72+
streams with the same name.
73+
74+
- `.Directory` is an object whose keys are entries in `.Paths` and whose values
75+
are objects with metadata and content. Since collisions are not properly
76+
handled here, `.FullPathDir` is the better option for new projects.
77+
78+
## Entry Object Description
79+
80+
The entry objects are available from `FullPathDir`, `FileIndex`, and `Directory`
81+
elements of the container object.
82+
83+
- `.name` is the (case sensitive) internal name
84+
- `.type` is the type (`stream` for files, `storage` for dirs, `root` for root)
85+
- `.content` is a Buffer/Array with the raw content
86+
- `.ct`/`.mt` are the creation and modification time (if provided in file)
87+
88+
# Notes
89+
90+
Case comparison has not been verified for non-ASCII character
91+
92+
Writing is not supported. It is in the works, but it has not yet been released.
93+
2194
# License
2295

2396
This implementation is covered under Apache 2.0 license. It complies with the

cfb.js

+13-1
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ function read_directory(idx) {
355355
}
356356
read_directory(dir_start);
357357

358+
/* [MS-CFB] 2.6.4 Red-Black Tree */
358359
function build_full_paths(Dir, pathobj, paths, patharr) {
359360
var i;
360361
var dad = new Array(patharr.length);
@@ -389,13 +390,24 @@ build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
389390
var root_name = Paths.shift();
390391
Paths.root = root_name;
391392

393+
/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
394+
function find_path(path) {
395+
if(path[0] === "/") path = root_name + path;
396+
var UCNames = (path.indexOf("/") !== -1 ? FullPaths : Paths).map(function(x) { return x.toUpperCase(); });
397+
var UCPath = path.toUpperCase();
398+
var w = UCNames.indexOf(UCPath);
399+
if(w === -1) return null;
400+
return path.indexOf("/") !== -1 ? FileIndex[w] : files[Paths[w]];
401+
}
402+
392403
var rval = {
393404
raw: {header: header, sectors: sectors},
394405
Paths: Paths,
395406
FileIndex: FileIndex,
396407
FullPaths: FullPaths,
397408
FullPathDir: FullPathDir,
398-
Directory: files
409+
Directory: files,
410+
find: find_path
399411
};
400412

401413
return rval;

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "cfb",
3-
"version": "0.5.0",
3+
"version": "0.6.0",
44
"author": "Niggler",
55
"description": "Compound File Binary File Format extractor",
66
"keywords": [ "cfb", "compression", "office" ],

test.js

+17-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
1+
/* vim: set ts=2: */
12
var CFB;
23
var fs = require('fs');
34
describe('source', function() { it('should load', function() { CFB = require('./'); }); });
5+
46
var files = fs.readdirSync('test_files').filter(function(x){return x.substr(-4)==".xls";});
5-
files.forEach(function(x) {
6-
describe(x, function() {
7+
8+
function parsetest(x, cfb) {
9+
describe(x + ' should have basic parts', function() {
10+
it('should find relative path', function() {
11+
if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x);
12+
});
13+
it('should find absolute path', function() {
14+
if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x);
15+
});
16+
});
17+
}
18+
19+
describe('should parse test files', function() {
20+
files.forEach(function(x) {
721
it('should parse ' + x, function() {
822
var cfb = CFB.read('./test_files/' + x, {type: "file"});
23+
parsetest(x, cfb);
924
});
1025
});
1126
});

0 commit comments

Comments
 (0)