Skip to content

Commit 0d6e172

Browse files
authored
feat: Added useRawBinaryStrings option to Decoder to allow override of default UTF-8 behaviour (#3)
1 parent 1007830 commit 0d6e172

File tree

3 files changed

+88
-14
lines changed

3 files changed

+88
-14
lines changed

README.md

+15-10
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,16 @@ console.log(buffer);
115115
| extensionCodec | ExtensionCodec | `ExtensionCodec.defaultCodec` |
116116
| context | user-defined | - |
117117
| forceBigIntToInt64 | boolean | false |
118+
| useRawBinaryStrings | boolean | false |
118119
| maxDepth | number | `100` |
119120
| initialBufferSize | number | `2048` |
120121
| sortKeys | boolean | false |
121122
| forceFloat32 | boolean | false |
122123
| forceIntegerToFloat | boolean | false |
123124
| ignoreUndefined | boolean | false |
124125

126+
To skip UTF-8 decoding of strings, `useRawBinaryStrings` can be set to `true`. In this case, strings are decoded into `Uint8Array`.
127+
125128
### `decode(buffer: ArrayLike<number> | BufferSource, options?: DecoderOptions): unknown`
126129

127130
It decodes `buffer` that includes a MessagePack-encoded object, and returns the decoded object typed `unknown`.
@@ -522,18 +525,19 @@ The mapping of integers varies on the setting of `intMode`.
522525
| number (53-bit int) | int family | number or bigint (\*2) |
523526
| number (64-bit float) | float family | number (64-bit float) |
524527
| bigint | int family | number or bigint (\*2) |
525-
| string | str family | string |
526-
| ArrayBufferView | bin family | Uint8Array (\*3) |
528+
| string | str family | string (\*3) |
529+
| ArrayBufferView | bin family | Uint8Array (\*4) |
527530
| Array | array family | Array |
528-
| Object | map family | Object (\*4) |
529-
| Date | timestamp ext family | Date (\*5) |
531+
| Object | map family | Object (\*5) |
532+
| Date | timestamp ext family | Date (\*6) |
530533
| bigint | int family | bigint |
531534

532-
- \*1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`
533-
- \*2 MessagePack ints are decoded as either numbers or bigints depending on the [IntMode](#intmode) used during decoding.
534-
- \*3 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
535-
- \*4 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
536-
- \*5 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
535+
* \*1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`
536+
* \*2 MessagePack ints are decoded as either numbers or bigints depending on the [IntMode](#intmode) used during decoding.
537+
* \*3 If you'd like to skip UTF-8 decoding of strings, set `useRawBinaryStrings: true`. In this case, strings are decoded into `Uint8Array`.
538+
* \*4 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
539+
* \*5 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
540+
* \*6 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
537541

538542
If you set `useBigInt64: true`, the following mapping is used:
539543

@@ -550,8 +554,9 @@ If you set `useBigInt64: true`, the following mapping is used:
550554
| Object | map family | Object |
551555
| Date | timestamp ext family | Date |
552556

553-
- \*5 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
557+
* \*6 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
554558

559+
* \*7 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
555560
## Prerequisites
556561

557562
This is a universal JavaScript library that supports major browsers and NodeJS.

src/Decoder.ts

+24-4
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
2727
*/
2828
intMode?: IntMode;
2929

30+
/**
31+
* By default, string values will be decoded as UTF-8 strings. However, if this option is true,
32+
* string values will be returned as Uint8Arrays without additional decoding.
33+
*
34+
* This is useful if the strings may contain invalid UTF-8 sequences.
35+
*
36+
* Note that this option only applies to string values, not map keys. Additionally, when
37+
* enabled, raw string length is limited by the maxBinLength option.
38+
*/
39+
useRawBinaryStrings: boolean;
40+
3041
/**
3142
* Maximum string length.
3243
*
@@ -202,6 +213,7 @@ export class Decoder<ContextType = undefined> {
202213
private readonly extensionCodec: ExtensionCodecType<ContextType>;
203214
private readonly context: ContextType;
204215
private readonly intMode: IntMode;
216+
private readonly useRawBinaryStrings: boolean;
205217
private readonly maxStrLength: number;
206218
private readonly maxBinLength: number;
207219
private readonly maxArrayLength: number;
@@ -222,6 +234,7 @@ export class Decoder<ContextType = undefined> {
222234
this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined
223235

224236
this.intMode = options?.intMode ?? (options?.useBigInt64 ? IntMode.AS_ENCODED : IntMode.UNSAFE_NUMBER);
237+
this.useRawBinaryStrings = options?.useRawBinaryStrings ?? false;
225238
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
226239
this.maxBinLength = options?.maxBinLength ?? UINT32_MAX;
227240
this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX;
@@ -406,7 +419,7 @@ export class Decoder<ContextType = undefined> {
406419
} else {
407420
// fixstr (101x xxxx) 0xa0 - 0xbf
408421
const byteLength = headByte - 0xa0;
409-
object = this.decodeUtf8String(byteLength, 0);
422+
object = this.decodeString(byteLength, 0);
410423
}
411424
} else if (headByte === 0xc0) {
412425
// nil
@@ -450,15 +463,15 @@ export class Decoder<ContextType = undefined> {
450463
} else if (headByte === 0xd9) {
451464
// str 8
452465
const byteLength = this.lookU8();
453-
object = this.decodeUtf8String(byteLength, 1);
466+
object = this.decodeString(byteLength, 1);
454467
} else if (headByte === 0xda) {
455468
// str 16
456469
const byteLength = this.lookU16();
457-
object = this.decodeUtf8String(byteLength, 2);
470+
object = this.decodeString(byteLength, 2);
458471
} else if (headByte === 0xdb) {
459472
// str 32
460473
const byteLength = this.lookU32();
461-
object = this.decodeUtf8String(byteLength, 4);
474+
object = this.decodeString(byteLength, 4);
462475
} else if (headByte === 0xdc) {
463476
// array 16
464477
const size = this.readU16();
@@ -636,6 +649,13 @@ export class Decoder<ContextType = undefined> {
636649
this.stack.pushArrayState(size);
637650
}
638651

652+
private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
653+
if (!this.useRawBinaryStrings || this.stateIsMapKey()) {
654+
return this.decodeUtf8String(byteLength, headerOffset);
655+
}
656+
return this.decodeBinary(byteLength, headerOffset);
657+
}
658+
639659
private decodeUtf8String(byteLength: number, headerOffset: number): string {
640660
if (byteLength > this.maxStrLength) {
641661
throw new DecodeError(

test/decode-raw-strings.test.ts

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import assert from "assert";
2+
import { encode, decode } from "../src";
3+
import type { DecoderOptions } from "../src";
4+
5+
describe("decode with useRawBinaryStrings specified", () => {
6+
const options = { useRawBinaryStrings: true } satisfies DecoderOptions;
7+
8+
it("decodes string as binary", () => {
9+
const actual = decode(encode("foo"), options);
10+
const expected = Uint8Array.from([0x66, 0x6f, 0x6f]);
11+
assert.deepStrictEqual(actual, expected);
12+
});
13+
14+
it("decodes invalid UTF-8 string as binary", () => {
15+
const invalidUtf8String = Uint8Array.from([
16+
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176,
17+
184, 221, 66, 188, 171, 36, 135, 121,
18+
]);
19+
const encoded = Uint8Array.from([
20+
196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
21+
176, 184, 221, 66, 188, 171, 36, 135, 121,
22+
]);
23+
24+
const actual = decode(encoded, options);
25+
assert.deepStrictEqual(actual, invalidUtf8String);
26+
});
27+
28+
it("decodes object keys as strings", () => {
29+
const actual = decode(encode({ key: "foo" }), options);
30+
const expected = { key: Uint8Array.from([0x66, 0x6f, 0x6f]) };
31+
assert.deepStrictEqual(actual, expected);
32+
});
33+
34+
it("ignores maxStrLength", () => {
35+
const lengthLimitedOptions = { ...options, maxStrLength: 1 } satisfies DecoderOptions;
36+
37+
const actual = decode(encode("foo"), lengthLimitedOptions);
38+
const expected = Uint8Array.from([0x66, 0x6f, 0x6f]);
39+
assert.deepStrictEqual(actual, expected);
40+
});
41+
42+
it("respects maxBinLength", () => {
43+
const lengthLimitedOptions = { ...options, maxBinLength: 1 } satisfies DecoderOptions;
44+
45+
assert.throws(() => {
46+
decode(encode("foo"), lengthLimitedOptions);
47+
}, /max length exceeded/i);
48+
});
49+
});

0 commit comments

Comments
 (0)