Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JS Map support for encoding & decoding #5

Merged
merged 4 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 86 additions & 20 deletions src/Decoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,49 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
*
* This is useful if the strings may contain invalid UTF-8 sequences.
*
* Note that this option only applies to string values, not map keys. Additionally, when
* enabled, raw string length is limited by the maxBinLength option.
* When enabled, raw string length is limited by the maxBinLength option.
*
* Note that this option only applies to string values, not map keys. See `rawBinaryStringKeys`
* for map keys.
*/
rawBinaryStringValues: boolean;

/**
* By default, map keys will be decoded as UTF-8 strings. However, if this option is true, map
* keys will be returned as Uint8Arrays without additional decoding.
*
* Requires `useMap` to be true, since plain objects do not support binary keys.
*
* When enabled, raw string length is limited by the maxBinLength option.
*
* Note that this option only applies to map keys, not string values. See `rawBinaryStringValues`
* for string values.
*/
rawBinaryStringKeys: boolean;

/**
* If true, the decoder will use the Map object to store map values. If false, it will use plain
* objects. Defaults to false.
*
* Besides the type of container, the main difference is that Map objects support a wider range
* of key types. Plain objects only support string keys, while Map objects support strings,
* numbers, bigints, and Uint8Arrays.
*/
useMap: boolean;

/**
* If true, the decoder will support decoding numbers as map keys on plain objects. Defaults to
* false.
*
* Note that any numbers used as object keys will be converted to strings, so there is a risk of
* key collision as well as the inability to re-encode the object to the same representation.
*
* This option is ignored if `useMap` is true.
*
* This is useful for backwards compatibility before `useMap` was introduced. Consider instead
* using `useMap` for new code.
*/
useRawBinaryStrings: boolean;
supportObjectNumberKeys: boolean;

/**
* Maximum string length.
Expand Down Expand Up @@ -82,18 +121,22 @@ const STATE_ARRAY = "array";
const STATE_MAP_KEY = "map_key";
const STATE_MAP_VALUE = "map_value";

type MapKeyType = string | number;
type MapKeyType = string | number | bigint | Uint8Array;

const isValidMapKeyType = (key: unknown): key is MapKeyType => {
return typeof key === "string" || typeof key === "number";
};
function isValidMapKeyType(key: unknown, useMap: boolean, supportObjectNumberKeys: boolean): key is MapKeyType {
if (useMap) {
return typeof key === "string" || typeof key === "number" || typeof key === "bigint" || key instanceof Uint8Array;
}
// Plain objects support a more limited set of key types
return typeof key === "string" || (supportObjectNumberKeys && typeof key === "number");
}

type StackMapState = {
type: typeof STATE_MAP_KEY | typeof STATE_MAP_VALUE;
size: number;
key: MapKeyType | null;
readCount: number;
map: Record<string, unknown>;
map: Record<string, unknown> | Map<MapKeyType, unknown>;
};

type StackArrayState = {
Expand All @@ -107,6 +150,8 @@ class StackPool {
private readonly stack: Array<StackState> = [];
private stackHeadPosition = -1;

constructor(private readonly useMap: boolean) {}

public get length(): number {
return this.stackHeadPosition + 1;
}
Expand All @@ -130,7 +175,7 @@ class StackPool {
state.type = STATE_MAP_KEY;
state.readCount = 0;
state.size = size;
state.map = {};
state.map = this.useMap ? new Map() : {};
}

private getUninitializedStateFromPool() {
Expand Down Expand Up @@ -213,7 +258,10 @@ export class Decoder<ContextType = undefined> {
private readonly extensionCodec: ExtensionCodecType<ContextType>;
private readonly context: ContextType;
private readonly intMode: IntMode;
private readonly useRawBinaryStrings: boolean;
private readonly rawBinaryStringValues: boolean;
private readonly rawBinaryStringKeys: boolean;
private readonly useMap: boolean;
private readonly supportObjectNumberKeys: boolean;
private readonly maxStrLength: number;
private readonly maxBinLength: number;
private readonly maxArrayLength: number;
Expand All @@ -227,20 +275,29 @@ export class Decoder<ContextType = undefined> {
private view = EMPTY_VIEW;
private bytes = EMPTY_BYTES;
private headByte = HEAD_BYTE_REQUIRED;
private readonly stack = new StackPool();
private readonly stack: StackPool;

public constructor(options?: DecoderOptions<ContextType>) {
this.extensionCodec = options?.extensionCodec ?? (ExtensionCodec.defaultCodec as ExtensionCodecType<ContextType>);
this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined

this.intMode = options?.intMode ?? (options?.useBigInt64 ? IntMode.AS_ENCODED : IntMode.UNSAFE_NUMBER);
this.useRawBinaryStrings = options?.useRawBinaryStrings ?? false;
this.rawBinaryStringValues = options?.rawBinaryStringValues ?? false;
this.rawBinaryStringKeys = options?.rawBinaryStringKeys ?? false;
this.useMap = options?.useMap ?? false;
this.supportObjectNumberKeys = options?.supportObjectNumberKeys ?? false;
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
this.maxBinLength = options?.maxBinLength ?? UINT32_MAX;
this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX;
this.maxMapLength = options?.maxMapLength ?? UINT32_MAX;
this.maxExtLength = options?.maxExtLength ?? UINT32_MAX;
this.keyDecoder = options?.keyDecoder !== undefined ? options.keyDecoder : sharedCachedKeyDecoder;

if (this.rawBinaryStringKeys && !this.useMap) {
throw new Error("rawBinaryStringKeys is only supported when useMap is true");
}

this.stack = new StackPool(this.useMap);
}

private reinitializeState() {
Expand Down Expand Up @@ -404,7 +461,7 @@ export class Decoder<ContextType = undefined> {
this.complete();
continue DECODE;
} else {
object = {};
object = this.useMap ? new Map() : {};
}
} else if (headByte < 0xa0) {
// fixarray (1001 xxxx) 0x90 - 0x9f
Expand Down Expand Up @@ -571,10 +628,15 @@ export class Decoder<ContextType = undefined> {
continue DECODE;
}
} else if (state.type === STATE_MAP_KEY) {
if (!isValidMapKeyType(object)) {
throw new DecodeError("The type of key must be string or number but " + typeof object);
if (!isValidMapKeyType(object, this.useMap, this.supportObjectNumberKeys)) {
const acceptableTypes = this.useMap
? "string, number, bigint, or Uint8Array"
: this.supportObjectNumberKeys
? "string or number"
: "string";
throw new DecodeError(`The type of key must be ${acceptableTypes} but got ${typeof object}`);
}
if (object === "__proto__") {
if (!this.useMap && object === "__proto__") {
throw new DecodeError("The key __proto__ is not allowed");
}

Expand All @@ -584,7 +646,11 @@ export class Decoder<ContextType = undefined> {
} else {
// it must be `state.type === State.MAP_VALUE` here

state.map[state.key!] = object;
if (this.useMap) {
(state.map as Map<MapKeyType, unknown>).set(state.key!, object);
} else {
(state.map as Record<string, unknown>)[state.key as string] = object;
}
state.readCount++;

if (state.readCount === state.size) {
Expand Down Expand Up @@ -650,10 +716,10 @@ export class Decoder<ContextType = undefined> {
}

private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
if (!this.useRawBinaryStrings || this.stateIsMapKey()) {
return this.decodeUtf8String(byteLength, headerOffset);
if (this.stateIsMapKey() ? this.rawBinaryStringKeys : this.rawBinaryStringValues) {
return this.decodeBinary(byteLength, headerOffset);
}
return this.decodeBinary(byteLength, headerOffset);
return this.decodeUtf8String(byteLength, headerOffset);
}

private decodeUtf8String(byteLength: number, headerOffset: number): string {
Expand Down
76 changes: 66 additions & 10 deletions src/Encoder.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { utf8Count, utf8Encode } from "./utils/utf8";
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
import { setInt64, setUint64 } from "./utils/int";
import { ensureUint8Array } from "./utils/typedArrays";
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
import type { ExtData } from "./ExtData";
import type { ContextOf } from "./context";

Expand Down Expand Up @@ -41,6 +41,15 @@ export type EncoderOptions<ContextType = undefined> = Partial<
* binary is canonical and thus comparable to another encoded binary.
*
* Defaults to `false`. If enabled, it spends more time in encoding objects.
*
* If enabled, the encoder will throw an error if the NaN value is included in the keys of a
* map, since it is not comparable.
*
* If enabled and the keys of a map include multiple different types, each type will be sorted
* separately, and the order of the types will be as follows:
* 1. Numbers (including bigints)
* 2. Strings
* 3. Binary data
*/
sortKeys: boolean;

Expand Down Expand Up @@ -321,8 +330,10 @@ export class Encoder<ContextType = undefined> {
// this is here instead of in doEncode so that we can try encoding with an extension first,
// otherwise we would break existing extensions for bigints
this.encodeBigInt(object);
} else if (object instanceof Map) {
this.encodeMap(object, depth);
} else if (typeof object === "object") {
this.encodeMap(object as Record<string, unknown>, depth);
this.encodeMapObject(object as Record<string, unknown>, depth);
} else {
// symbol, function and other special object come here unless extensionCodec handles them.
throw new Error(`Unrecognized object: ${Object.prototype.toString.apply(object)}`);
Expand Down Expand Up @@ -371,25 +382,60 @@ export class Encoder<ContextType = undefined> {
}
}

private countWithoutUndefined(object: Record<string, unknown>, keys: ReadonlyArray<string>): number {
private countWithoutUndefined(map: Map<unknown, unknown>, keys: ReadonlyArray<unknown>): number {
let count = 0;

for (const key of keys) {
if (object[key] !== undefined) {
if (map.get(key) !== undefined) {
count++;
}
}

return count;
}

private encodeMap(object: Record<string, unknown>, depth: number) {
const keys = Object.keys(object);
private sortMapKeys(keys: Array<unknown>): Array<unknown> {
const numericKeys: Array<number | bigint> = [];
const stringKeys: Array<string> = [];
const binaryKeys: Array<Uint8Array> = [];
for (const key of keys) {
if (typeof key === "number") {
if (isNaN(key)) {
throw new Error("Cannot sort map keys with NaN value");
}
numericKeys.push(key);
} else if (typeof key === "bigint") {
numericKeys.push(key);
} else if (typeof key === "string") {
stringKeys.push(key);
} else if (ArrayBuffer.isView(key)) {
binaryKeys.push(ensureUint8Array(key));
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
}
numericKeys.sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); // Avoid using === to compare numbers and bigints
stringKeys.sort();
binaryKeys.sort(compareUint8Arrays);
// At the moment this arbitrarily orders the keys as numeric, string, binary
return ([] as Array<unknown>).concat(numericKeys, stringKeys, binaryKeys);
}

private encodeMapObject(object: Record<string, unknown>, depth: number) {
this.encodeMap(new Map<string, unknown>(Object.entries(object)), depth);
}

private encodeMap(map: Map<unknown, unknown>, depth: number) {
let keys = Array.from(map.keys());
if (this.sortKeys) {
keys.sort();
keys = this.sortMapKeys(keys);
}

const size = this.ignoreUndefined ? this.countWithoutUndefined(object, keys) : keys.length;
// Map keys may encode to the same underlying value. For example, the number 3 and the bigint 3.
// This is also possible with ArrayBufferViews. We may want to introduce a new encoding option
// which checks for duplicate keys in this sense and throws an error if they are found.

const size = this.ignoreUndefined ? this.countWithoutUndefined(map, keys) : keys.length;

if (size < 16) {
// fixmap
Expand All @@ -407,10 +453,20 @@ export class Encoder<ContextType = undefined> {
}

for (const key of keys) {
const value = object[key];
const value = map.get(key);

if (!(this.ignoreUndefined && value === undefined)) {
this.encodeString(key);
if (typeof key === "string") {
this.encodeString(key);
} else if (typeof key === "number") {
this.encodeNumber(key);
} else if (typeof key === "bigint") {
this.encodeBigInt(key);
} else if (ArrayBuffer.isView(key)) {
this.encodeBinary(key);
} else {
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
}
this.doEncode(value, depth + 1);
}
}
Expand Down
11 changes: 11 additions & 0 deletions src/utils/typedArrays.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,14 @@ export function createDataView(buffer: ArrayLike<number> | ArrayBufferView | Arr
const bufferView = ensureUint8Array(buffer);
return new DataView(bufferView.buffer, bufferView.byteOffset, bufferView.byteLength);
}

export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
const length = Math.min(a.length, b.length);
for (let i = 0; i < length; i++) {
const diff = a[i]! - b[i]!;
if (diff !== 0) {
return diff;
}
}
return a.length - b.length;
}
30 changes: 15 additions & 15 deletions test/codec-bigint.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,24 +253,24 @@ describe("codec BigInt", () => {
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
});

it("encodes and decodes 100n", () => {
const value = BigInt(100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
it("encodes and decodes 100n", () => {
const value = BigInt(100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});

it("encodes and decodes -100n", () => {
const value = BigInt(-100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
it("encodes and decodes -100n", () => {
const value = BigInt(-100);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});

it("encodes and decodes MAX_SAFE_INTEGER+1", () => {
const value = BigInt(Number.MAX_SAFE_INTEGER) + BigInt(1);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
it("encodes and decodes MAX_SAFE_INTEGER+1", () => {
const value = BigInt(Number.MAX_SAFE_INTEGER) + BigInt(1);
const encoded = encode(value, { extensionCodec });
assert.deepStrictEqual(decode(encoded, { extensionCodec }), value);
});
});

context("native", () => {
Expand Down
Loading
Loading