From 2ef8afe5bcb8a73483266c3c548c7864b51921d5 Mon Sep 17 00:00:00 2001 From: Trevor Manz Date: Tue, 25 Feb 2025 14:32:23 -0500 Subject: [PATCH 1/2] Support explicit order of dimensions in transpose --- packages/core/src/codecs/bytes.ts | 8 +-- packages/core/src/codecs/transpose.ts | 80 ++++++++++++++++++++++----- packages/core/src/util.ts | 38 +++++++------ 3 files changed, 91 insertions(+), 35 deletions(-) diff --git a/packages/core/src/codecs/bytes.ts b/packages/core/src/codecs/bytes.ts index 619f9e06..596b033a 100644 --- a/packages/core/src/codecs/bytes.ts +++ b/packages/core/src/codecs/bytes.ts @@ -31,10 +31,10 @@ function bytes_per_element( export class BytesCodec> { kind = "array_to_bytes"; - #strides: number[]; + #stride: Array; #TypedArray: TypedArrayConstructor; #BYTES_PER_ELEMENT: number; - #shape: number[]; + #shape: Array; #endian?: "little" | "big"; constructor( @@ -44,7 +44,7 @@ export class BytesCodec> { this.#endian = configuration?.endian; this.#TypedArray = get_ctr(meta.data_type); this.#shape = meta.shape; - this.#strides = get_strides(meta.shape, get_array_order(meta.codecs)); + this.#stride = get_strides(meta.shape, "C"); // TODO: fix me. // hack to get bytes per element since it's dynamic for string types. const sample = new this.#TypedArray(0); @@ -77,7 +77,7 @@ export class BytesCodec> { bytes.byteLength / this.#BYTES_PER_ELEMENT, ), shape: this.#shape, - stride: this.#strides, + stride: this.#stride, }; } } diff --git a/packages/core/src/codecs/transpose.ts b/packages/core/src/codecs/transpose.ts index b0628957..66250ed6 100644 --- a/packages/core/src/codecs/transpose.ts +++ b/packages/core/src/codecs/transpose.ts @@ -1,3 +1,4 @@ +import assert from "node:assert"; import type { Chunk, DataType, @@ -41,7 +42,7 @@ function proxy(arr: TypedArray): TypedArrayProxy { function empty_like( chunk: Chunk, - order: "C" | "F", + order: Order, ): Chunk { let data: TypedArray; if ( @@ -67,7 +68,7 @@ function empty_like( function convert_array_order( src: Chunk, - target: "C" | "F", + target: Order, ): Chunk { let out = empty_like(src, target); let n_dims = src.shape.length; @@ -99,30 +100,83 @@ function convert_array_order( return out; } -function get_order(arr: Chunk): "C" | "F" { - // Assume C order if no stride is given - if (!arr.stride) return "C"; - let row_major_strides = get_strides(arr.shape, "C"); - return arr.stride.every((s, i) => s === row_major_strides[i]) ? "C" : "F"; +/** Determine the memory order (axis permutation) for a chunk */ +function get_order(chunk: Chunk): number[] { + let rank = chunk.shape.length; + assert( + rank === chunk.stride.length, + "Shape and stride must have the same length.", + ); + return chunk.stride + .map((s, i) => ({ stride: s, index: i })) + .sort((a, b) => b.stride - a.stride) + .map((entry) => entry.index); } +function matches_order(chunk: Chunk, target: Order) { + let source = get_order(chunk); + assert(source.length === target.length, "Orders must match"); + return source.every((dim, i) => dim === target[i]); +} + +type Order = "C" | "F" | Array; + export class TransposeCodec { kind = "array_to_array"; + #order: Array; + #inverseOrder: Array; + + constructor(configuration: { order?: Order }, meta: { shape: number[] }) { + let value = configuration.order ?? "C"; + let rank = meta.shape.length; + let order = new Array(rank); + let inverseOrder = new Array(rank); + + if (value === "C") { + for (let i = 0; i < rank; ++i) { + order[i] = i; + inverseOrder[i] = i; + } + } else if (value === "F") { + for (let i = 0; i < rank; ++i) { + order[i] = rank - i - 1; + inverseOrder[i] = rank - i - 1; + } + } else { + order = value; + order.forEach((x, i) => { + assert( + inverseOrder[x] === undefined, + `Invalid permutation: ${JSON.stringify(value)}`, + ); + inverseOrder[x] = i; + }); + } - constructor(public configuration?: { order: "C" | "F" }) {} + this.#order = order; + this.#inverseOrder = inverseOrder; + } - static fromConfig(configuration: { order: "C" | "F" }) { - return new TransposeCodec(configuration); + static fromConfig( + configuration: { order: Order }, + meta: { shape: number[] }, + ) { + return new TransposeCodec(configuration, meta); } encode(arr: Chunk): Chunk { - if (get_order(arr) === this.configuration?.order) { + if (matches_order(arr, this.#inverseOrder)) { + // can skip making a copy return arr; } - return convert_array_order(arr, this.configuration?.order ?? "C"); + return convert_array_order(arr, this.#inverseOrder); } decode(arr: Chunk): Chunk { - return arr; + return { + data: arr.data, + shape: arr.shape, + stride: get_strides(arr.shape, this.#order), + }; } } diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index 587f3d6a..1a5e554d 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -79,27 +79,29 @@ export function get_ctr( } /** Compute strides for 'C' or 'F' ordered array from shape */ -export function get_strides(shape: readonly number[], order: "C" | "F") { - return (order === "C" ? row_major_stride : col_major_stride)(shape); -} - -function row_major_stride(shape: readonly number[]) { - const ndim = shape.length; - const stride: number[] = globalThis.Array(ndim); - for (let i = ndim - 1, step = 1; i >= 0; i--) { - stride[i] = step; - step *= shape[i]; +export function get_strides( + shape: readonly number[], + order: "C" | "F" | Array, +) { + const rank = shape.length; + if (typeof order === "string") { + order = + order === "C" + ? Array.from({ length: rank }, (_, i) => i) // Row-major (identity order) + : Array.from({ length: rank }, (_, i) => rank - 1 - i); // Column-major (reverse order) } - return stride; -} + assert( + rank === order.length, + "Order length must match the number of dimensions.", + ); -function col_major_stride(shape: readonly number[]) { - const ndim = shape.length; - const stride: number[] = globalThis.Array(ndim); - for (let i = 0, step = 1; i < ndim; i++) { - stride[i] = step; - step *= shape[i]; + let step = 1; + let stride = new Array(rank); + for (let i = order.length - 1; i >= 0; i--) { + stride[order[i]] = step; + step *= shape[order[i]]; } + return stride; } From 00afc7ab9216488a8b2081c3799895f2a0cb1139 Mon Sep 17 00:00:00 2001 From: Trevor Manz Date: Tue, 25 Feb 2025 14:51:57 -0500 Subject: [PATCH 2/2] Ensure ordering is respected --- .changeset/late-tables-peel.md | 5 +++++ packages/core/src/codecs/bytes.ts | 7 +------ packages/core/src/hierarchy.ts | 10 +++++++++- packages/core/src/util.ts | 9 +-------- 4 files changed, 16 insertions(+), 15 deletions(-) create mode 100644 .changeset/late-tables-peel.md diff --git a/.changeset/late-tables-peel.md b/.changeset/late-tables-peel.md new file mode 100644 index 00000000..21ee2749 --- /dev/null +++ b/.changeset/late-tables-peel.md @@ -0,0 +1,5 @@ +--- +"@zarrita/core": patch +--- + +Support transpose wiht explicit permutation diff --git a/packages/core/src/codecs/bytes.ts b/packages/core/src/codecs/bytes.ts index 596b033a..3a4c5ab3 100644 --- a/packages/core/src/codecs/bytes.ts +++ b/packages/core/src/codecs/bytes.ts @@ -4,12 +4,7 @@ import type { DataType, TypedArrayConstructor, } from "../metadata.js"; -import { - byteswap_inplace, - get_array_order, - get_ctr, - get_strides, -} from "../util.js"; +import { byteswap_inplace, get_ctr, get_strides } from "../util.js"; const LITTLE_ENDIAN_OS = system_is_little_endian(); diff --git a/packages/core/src/hierarchy.ts b/packages/core/src/hierarchy.ts index dad457df..8d2f7f37 100644 --- a/packages/core/src/hierarchy.ts +++ b/packages/core/src/hierarchy.ts @@ -5,6 +5,7 @@ import type { ArrayMetadata, Attributes, Chunk, + CodecMetadata, DataType, GroupMetadata, Scalar, @@ -19,7 +20,6 @@ import { import { create_chunk_key_encoder, ensure_correct_scalar, - get_array_order, get_ctr, get_strides, } from "./util.js"; @@ -63,6 +63,14 @@ export class Group extends Location { } } +function get_array_order( + codecs: CodecMetadata[], +): "C" | "F" | globalThis.Array { + const maybe_transpose_codec = codecs.find((c) => c.name === "transpose"); + // @ts-expect-error - TODO: Should validate? + return maybe_transpose_codec?.configuration?.order ?? "C"; +} + const CONTEXT_MARKER = Symbol("zarrita.context"); export function get_context(obj: { [CONTEXT_MARKER]: T }): T { diff --git a/packages/core/src/util.ts b/packages/core/src/util.ts index 1a5e554d..8b5482d4 100644 --- a/packages/core/src/util.ts +++ b/packages/core/src/util.ts @@ -121,13 +121,6 @@ export function create_chunk_key_encoder({ throw new Error(`Unknown chunk key encoding: ${name}`); } -export function get_array_order(codecs: CodecMetadata[]): "C" | "F" { - const maybe_transpose_codec = codecs.find((c) => c.name === "transpose"); - return maybe_transpose_codec?.configuration?.order === "F" ? "F" : "C"; -} - -const endian_regex = /^([<|>])(.*)$/; - function coerce_dtype( dtype: string, ): { data_type: DataType } | { data_type: DataType; endian: "little" | "big" } { @@ -135,7 +128,7 @@ function coerce_dtype( return { data_type: "v2:object" }; } - let match = dtype.match(endian_regex); + let match = dtype.match(/^([<|>])(.*)$/); assert(match, `Invalid dtype: ${dtype}`); let [, endian, rest] = match;