Skip to content
This repository was archived by the owner on Jan 13, 2025. It is now read-only.

Commit eaab3c3

Browse files
committed
UTF8: add to support string storage
Allow storage of a UTF8 string without terminating NUL, in a blob that may be length-constrained. Closes pabigot#21.
1 parent 83017cc commit eaab3c3

File tree

3 files changed

+174
-2
lines changed

3 files changed

+174
-2
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## [Unreleased]
44

5+
* **API** Add [UTF8][doc:UTF8] to encode UTF strings in a possibly
6+
bounded buffer, resolving [issue #21][issue#21].
57
* **API** Allow the layout parameter of
68
a [VariantLayout][doc:VariantLayout] to be omitted in cases where no
79
data beyond the discriminator is required,
@@ -170,6 +172,7 @@
170172
[doc:Union]: http://pabigot.github.io/buffer-layout/module-Layout-Union.html
171173
[doc:Union.getSourceVariant]: http://pabigot.github.io/buffer-layout/module-Layout-Union.html#getSourceVariant
172174
[doc:UnionDiscriminator]: http://pabigot.github.io/buffer-layout/module-Layout-UnionDiscriminator.html
175+
[doc:UTF8]: http://pabigot.github.io/buffer-layout/module-Layout-UTF8.html
173176
[doc:VariantLayout]: http://pabigot.github.io/buffer-layout/module-Layout-VariantLayout.html
174177
[issue#1]: https://github.com/pabigot/buffer-layout/issues/1
175178
[issue#2]: https://github.com/pabigot/buffer-layout/issues/2
@@ -189,6 +192,7 @@
189192
[issue#17]: https://github.com/pabigot/buffer-layout/issues/17
190193
[issue#19]: https://github.com/pabigot/buffer-layout/issues/19
191194
[issue#20]: https://github.com/pabigot/buffer-layout/issues/20
195+
[issue#21]: https://github.com/pabigot/buffer-layout/issues/21
192196
[ci:travis]: https://travis-ci.org/pabigot/buffer-layout
193197
[ci:coveralls]: https://coveralls.io/github/pabigot/buffer-layout
194198
[node:issue#3992]: https://github.com/nodejs/node/issues/3992

lib/Layout.js

Lines changed: 99 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,7 @@ class Sequence extends Layout {
11141114
}
11151115
const elo = this.elementLayout;
11161116
const span = src.reduce((span, v) => {
1117-
elo.encode(v, b, offset + span);
1118-
return span + elo.getSpan(b, offset + span);
1117+
return span + elo.encode(v, b, offset + span);
11191118
}, 0);
11201119
if (this.count instanceof ExternalLayout) {
11211120
this.count.encode(src.length, b, offset);
@@ -2406,6 +2405,100 @@ class CString extends Layout {
24062405
}
24072406
}
24082407

2408+
/**
2409+
* Contain a UTF8 string with implicit length.
2410+
*
2411+
* *Factory*: {@link module:Layout.utf8|utf8}
2412+
*
2413+
* **NOTE** Because the length is implicit in the size of the buffer
2414+
* this layout should be used only in isolation, or in a situation
2415+
* where the length can be expressed by operating on a slice of the
2416+
* containing buffer.
2417+
*
2418+
* @param {Number} [maxSpan] - the maximum length allowed for encoded
2419+
* string content. If not provided there is no bound on the allowed
2420+
* content.
2421+
*
2422+
* @param {String} [property] - initializer for {@link
2423+
* Layout#property|property}.
2424+
*
2425+
* @augments {Layout}
2426+
*/
2427+
class UTF8 extends Layout {
2428+
constructor(maxSpan, property) {
2429+
if (('string' === typeof maxSpan)
2430+
&& (undefined === property)) {
2431+
property = maxSpan;
2432+
maxSpan = undefined;
2433+
}
2434+
if (undefined === maxSpan) {
2435+
maxSpan = -1;
2436+
} else if (!Number.isInteger(maxSpan)) {
2437+
throw new TypeError('maxSpan must be an integer');
2438+
}
2439+
2440+
super(-1, property);
2441+
2442+
/** The maximum span of the layout in bytes.
2443+
*
2444+
* Positive values are generally expected. Zero is abnormal.
2445+
* Attempts to encode or decode a value that exceeds this length
2446+
* will throw a `RangeError`.
2447+
*
2448+
* A negative value indicates that there is no bound on the length
2449+
* of the content. */
2450+
this.maxSpan = maxSpan;
2451+
}
2452+
2453+
/** @override */
2454+
getSpan(b, offset) {
2455+
if (!(b instanceof Buffer)) {
2456+
throw new TypeError('b must be a Buffer');
2457+
}
2458+
if (undefined === offset) {
2459+
offset = 0;
2460+
}
2461+
return b.length - offset;
2462+
}
2463+
2464+
/** @override */
2465+
decode(b, offset, dest) {
2466+
if (undefined === offset) {
2467+
offset = 0;
2468+
}
2469+
let span = this.getSpan(b, offset);
2470+
if ((0 <= this.maxSpan)
2471+
&& (this.maxSpan < span)) {
2472+
throw new RangeError('text length exceeds maxSpan');
2473+
}
2474+
return b.slice(offset, offset + span).toString('utf-8');
2475+
}
2476+
2477+
/** @override */
2478+
encode(src, b, offset) {
2479+
if (undefined === offset) {
2480+
offset = 0;
2481+
}
2482+
/* Must force this to a string, lest it be a number and the
2483+
* "utf8-encoding" below actually allocate a buffer of length
2484+
* src */
2485+
if ('string' !== typeof src) {
2486+
src = src.toString();
2487+
}
2488+
const srcb = new Buffer(src, 'utf8');
2489+
const span = srcb.length;
2490+
if ((0 <= this.maxSpan)
2491+
&& (this.maxSpan < span)) {
2492+
throw new RangeError('text length exceeds maxSpan');
2493+
}
2494+
if ((offset + span) > b.length) {
2495+
throw new RangeError('encoding overruns Buffer');
2496+
}
2497+
srcb.copy(b, offset);
2498+
return span;
2499+
}
2500+
}
2501+
24092502
/**
24102503
* Contain a constant value.
24112504
*
@@ -2475,6 +2568,7 @@ exports.BitField = BitField;
24752568
exports.Boolean = Boolean;
24762569
exports.Blob = Blob;
24772570
exports.CString = CString;
2571+
exports.UTF8 = UTF8;
24782572
exports.Constant = Constant;
24792573

24802574
/** Factory for {@link GreedyCount}. */
@@ -2620,5 +2714,8 @@ exports.blob = ((length, property) => new Blob(length, property));
26202714
/** Factory for {@link CString} values. */
26212715
exports.cstr = (property => new CString(property));
26222716

2717+
/** Factory for {@link UTF8} values. */
2718+
exports.utf8 = ((maxSpan, property) => new UTF8(maxSpan, property));
2719+
26232720
/** Factory for {@link Constant} values. */
26242721
exports.const = ((value, property) => new Constant(value, property));

test/LayoutTest.js

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1923,6 +1923,77 @@ suite('Layout', function() {
19231923
assert.equal(Buffer.from('68690075006300', 'hex').compare(b), 0);
19241924
});
19251925
});
1926+
suite('UTF8', function() {
1927+
test('ctor', function() {
1928+
const cst = lo.utf8();
1929+
assert(0 > cst.span);
1930+
assert.strictEqual(cst.maxSpan, -1);
1931+
});
1932+
test('ctor with maxSpan', function() {
1933+
const cst = lo.utf8(5);
1934+
assert.strictEqual(cst.maxSpan, 5);
1935+
});
1936+
test('ctor with invalid maxSpan', function() {
1937+
assert.throws(() => new lo.UTF8(23.1), TypeError);
1938+
});
1939+
test('#getSpan', function() {
1940+
const cst = new lo.UTF8();
1941+
assert.throws(() => cst.getSpan(), TypeError);
1942+
assert.equal(cst.getSpan(Buffer.from('00', 'hex')), 1);
1943+
assert.equal(cst.getSpan(Buffer.from('4100', 'hex')), 2);
1944+
assert.equal(cst.getSpan(Buffer.from('4100', 'hex'), 1), 1);
1945+
assert.equal(cst.getSpan(Buffer.from('4142', 'hex')), 2);
1946+
});
1947+
test('#decode', function() {
1948+
const cst = new lo.UTF8(3);
1949+
assert.equal(cst.decode(Buffer.from('00', 'hex')), '\x00');
1950+
assert.equal(cst.decode(Buffer.from('4100', 'hex')), 'A\x00');
1951+
assert.equal(cst.decode(Buffer.from('4100', 'hex'), 1), '\x00');
1952+
assert.equal(cst.decode(Buffer.from('4142', 'hex')), 'AB');
1953+
assert.throws(() => cst.decode(Buffer.from('four', 'utf8')),
1954+
RangeError);
1955+
});
1956+
test('#encode', function() {
1957+
const cst = new lo.UTF8();
1958+
const b = Buffer.alloc(3);
1959+
b.fill(0xFF);
1960+
assert.equal(cst.encode('', b), 0);
1961+
assert.equal(Buffer.from('ffffff', 'hex').compare(b), 0);
1962+
assert.equal(cst.encode('A', b), 1);
1963+
assert.equal(Buffer.from('41ffff', 'hex').compare(b), 0);
1964+
assert.equal(cst.encode('B', b, 1), 1);
1965+
assert.equal(Buffer.from('4142ff', 'hex').compare(b), 0);
1966+
assert.equal(cst.encode(5, b), 1);
1967+
assert.equal(Buffer.from('3542ff', 'hex').compare(b), 0);
1968+
assert.equal(cst.encode('abc', b), 3);
1969+
assert.equal(Buffer.from('616263', 'hex').compare(b), 0);
1970+
assert.throws(() => cst.encode('four', b), RangeError);
1971+
});
1972+
test('#encode with maxSpan', function() {
1973+
const cst = new lo.UTF8(2);
1974+
const b = Buffer.alloc(3);
1975+
b.fill(0xFF);
1976+
assert.throws(() => cst.encode('abc', b), RangeError);
1977+
});
1978+
test('in struct', function() {
1979+
const st = lo.struct([lo.utf8('k'),
1980+
lo.utf8('v')]);
1981+
const b = Buffer.from('6162323334', 'hex');
1982+
assert.throws(() => st.getSpan(), RangeError);
1983+
assert.equal(st.fields[0].getSpan(b), b.length);
1984+
assert.equal(st.fields[1].getSpan(b, 2), b.length - 2);
1985+
assert.equal(st.getSpan(b), b.length);
1986+
assert.deepEqual(st.decode(b), {k: 'ab234', v: ''});
1987+
});
1988+
test('in seq', function() {
1989+
const seq = lo.seq(lo.utf8(), 3);
1990+
const b = Buffer.from('4162633435', 'hex');
1991+
assert.deepEqual(seq.decode(b), ['Abc45', '', '']);
1992+
b.fill(0xFF);
1993+
assert.equal(seq.encode(['hi', 'u', 'c'], b), 2 + 1 + 1);
1994+
assert.equal(Buffer.from('68697563ff', 'hex').compare(b), 0);
1995+
});
1996+
});
19261997
suite('Constant', function() {
19271998
test('ctor', function() {
19281999
const c = new lo.Constant('value', 'p');

0 commit comments

Comments
 (0)