Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Commit 09b3d36

Browse files
authored
src: remove ndarray from dependencies (#153)
* remove ndarray from dependencies * resolve comments
1 parent 6672c5f commit 09b3d36

19 files changed

+538
-761
lines changed

lib/api/tensor-impl.ts

+5-12
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ export class Tensor implements TensorInterface {
4848
get(...indices: number[]): ElementType;
4949
get(indices: ReadonlyArray<number>): ElementType;
5050
get(indices?: ReadonlyArray<number>|number, ...rest: number[]): ElementType {
51-
let flatIndices = 0;
5251
let indexArray: ReadonlyArray<number> = [];
5352
if (typeof indices === 'number') {
5453
indexArray = [indices, ...rest];
@@ -67,18 +66,17 @@ export class Tensor implements TensorInterface {
6766
if (dim >= this.dims[idx]) {
6867
throw new RangeError(`Input index array dims don't match the tensor dims.`);
6968
}
70-
flatIndices += idx < indexArray.length - 1 ? dim * this.dims.slice(idx + 1).reduce((a, b) => a * b) : dim;
7169
});
70+
const value = this.internalTensor.get(indexArray);
7271
if (this.type === 'bool') {
73-
return this.data[flatIndices] === 1 ? true : false;
72+
return value === 1 ? true : false;
7473
}
75-
return this.data[flatIndices];
74+
return value;
7675
}
7776
set(value: ElementType, ...indices: number[]): void;
7877
set(value: ElementType, indices: ReadonlyArray<number>): void;
7978
set(value: ElementType, indices?: ReadonlyArray<number>|number, ...rest: number[]) {
8079
Utils.matchElementType(this.type, value);
81-
let flatIndices = 0;
8280
let indexArray: ReadonlyArray<number> = [];
8381
if (typeof indices === 'number') {
8482
indexArray = [indices, ...rest];
@@ -97,17 +95,12 @@ export class Tensor implements TensorInterface {
9795
if (dim >= this.dims[idx]) {
9896
throw new RangeError(`Input index array dims don't match the tensor dims.`);
9997
}
100-
flatIndices += idx < indexArray.length - 1 ? dim * this.dims.slice(idx + 1).reduce((a, b) => a * b) : dim;
10198
});
10299

103100
if (typeof value === 'boolean') {
104-
this.data[flatIndices] = value ? 1 : 0;
105-
} else if (typeof value === 'string') {
106-
this.data[flatIndices] = value;
107-
} else if (ArrayBuffer.isView(this.data)) {
108-
this.data.set([value], flatIndices);
101+
this.internalTensor.set(indexArray, value ? 1 : 0);
109102
} else {
110-
throw new TypeError(`Value type is not supported. `);
103+
this.internalTensor.set(indexArray, value);
111104
}
112105
}
113106
}

lib/backends/cpu/ops/argMax.ts

+4-5
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ export class CpuArgMax extends ArgMax {
1010
}
1111
}
1212

13-
export function argMax(x: Tensor, axis: number, keepdims: number): Tensor {
13+
export function argMax(x: Tensor, axis: number, keepdims: boolean): Tensor {
1414
const rank = x.dims ? x.dims.length : 1;
1515
axis = ShapeUtil.parseAxis(axis, rank);
16-
const outputDims = ReduceUtil.calcReduceShape(x.dims.slice(0), [axis], 1);
16+
const outputDims = ReduceUtil.calcReduceShape(x.dims, [axis], true);
1717
const X = x.data;
1818
const Y = new Int32Array(ShapeUtil.size(outputDims));
19-
const blockSize = axis >= x.dims.length ? 1 : ShapeUtil.size(x.dims.slice(axis + 1));
19+
const blockSize = ShapeUtil.sizeFromDimension(x.dims, axis + 1);
2020
const strides = ShapeUtil.computeStrides(outputDims);
2121
const inputStrides = ShapeUtil.computeStrides(x.dims);
2222
const indicesY = new Array(x.dims.length);
@@ -38,6 +38,5 @@ export function argMax(x: Tensor, axis: number, keepdims: number): Tensor {
3838
}
3939

4040
return new Tensor(
41-
keepdims ? outputDims : ReduceUtil.calcReduceShape(x.dims.slice(0), [axis], keepdims), 'int32', undefined,
42-
undefined, Y);
41+
keepdims ? outputDims : ReduceUtil.calcReduceShape(x.dims, [axis], keepdims), 'int32', undefined, undefined, Y);
4342
}

lib/backends/cpu/ops/binary-op.ts

+6-10
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT license.
33

4-
import ndarray from 'ndarray';
5-
64
import {Attribute} from '../../../attribute';
75
import {BinaryOp} from '../../../ops/binary-op';
86
import {Tensor} from '../../../tensor';
@@ -32,19 +30,17 @@ export class CpuBinaryOp extends BinaryOp {
3230
}
3331

3432
run(inferenceHandler: CpuInferenceHandler, inputs: Tensor[]): Tensor[] {
35-
const output = binaryOp(inputs[0], inputs[1], this.opLambda!, this.resultType);
33+
const output = binaryOp(inputs[0], inputs[1], this.opLambda!, false, this.resultType);
3634
return [output];
3735
}
3836
}
3937

40-
export function binaryOp(
41-
x: Tensor, y: Tensor, opLambda: (e1: number, e2: number) => number, resultType?: Tensor.DataType): Tensor {
42-
const result =
43-
BroadcastUtil.calc(ndarray(x.numberData, x.dims.slice(0)), ndarray(y.numberData, y.dims.slice(0)), opLambda);
38+
function binaryOp(
39+
x: Tensor, y: Tensor, opLambda: (e1: number, e2: number) => number, inplace: boolean,
40+
resultType?: Tensor.DataType): Tensor {
41+
const result = BroadcastUtil.calc(x, y, opLambda, inplace, resultType);
4442
if (!result) {
4543
throw new Error('not broadcastable');
4644
}
47-
const output = new Tensor(result.shape, resultType ? resultType : x.type);
48-
output.numberData.set(result.data);
49-
return output;
45+
return result;
5046
}

lib/backends/cpu/ops/conv.ts

+124-80
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT license.
33

4-
import ndarray from 'ndarray';
5-
import matrixProduct from 'ndarray-gemm';
6-
import nd_ops from 'ndarray-ops';
7-
84
import {Conv} from '../../../ops/conv';
95
import {Tensor} from '../../../tensor';
106
import {PoolConvUtil} from '../../../util';
117
import {CpuInferenceHandler} from '../inference-handler';
128

9+
import {matMul2d} from './matmul';
10+
1311
export class CpuConv extends Conv {
1412
run(inferenceHandler: CpuInferenceHandler, inputs: Tensor[]): Tensor[] {
13+
const x = inputs[0];
14+
const w = inputs[1];
15+
const b = inputs.length === 3 ? inputs[2] : undefined;
16+
1517
// if kernelShape is not specified in the attributes of this op, infer it from the weight tensor dims
1618
if (this.kernelShape.length === 0) {
1719
const wDims = inputs[1].dims;
@@ -20,88 +22,71 @@ export class CpuConv extends Conv {
2022
}
2123
}
2224

23-
const output = conv(
24-
inputs[0], inputs[1], inputs.length === 3 ? inputs[2] : null, this.autoPad, this.dilations, this.group,
25-
this.kernelShape, this.pads, this.strides);
26-
return [output];
27-
}
28-
}
29-
30-
export function conv(
31-
x: Tensor, w: Tensor, b: Tensor|null, autoPad: string, dilations: number[], group: number, kernelShape: number[],
32-
pads: number[], strides: number[]): Tensor {
33-
let ndx = ndarray(x.floatData as Float32Array, x.dims.slice(0)).transpose(0, 2, 3, 1);
34-
const ndk = ndarray(w.floatData as Float32Array, w.dims.slice(0)).transpose(2, 3, 1, 0);
35-
36-
// adjusting pads based on 'autoPad' attribute
37-
PoolConvUtil.adjustPadsBasedOnAutoPad(x.dims, strides, dilations, kernelShape, pads, autoPad);
38-
39-
// padding if needed
40-
const localPads: Array<[number, number]> = [[0, 0], [pads[0], pads[2]], [pads[1], pads[3]], [0, 0]];
41-
const padTotal = localPads.reduce((s, p) => s + p[0] + p[1], 0);
42-
if (padTotal !== 0) {
43-
const shape: number[] = ndx.shape;
44-
const newShape = shape.map((len, index) => len + localPads[index][0] + localPads[index][1]);
45-
const newSize = newShape.reduce((m, v) => m * v, 1);
46-
const ndp = ndarray(new Float32Array(newSize), newShape);
47-
const hiPoint = localPads.map((pair, index) => newShape[index] - pair[1]);
48-
const loPoint = localPads.map(pair => pair[0]);
49-
const originalSlice = ndp.hi(...hiPoint).lo(...loPoint);
50-
nd_ops.assign(originalSlice, ndx);
51-
ndx = ndp;
52-
}
53-
54-
const [batchSize, xRows, xCols, xChannels] = ndx.shape;
55-
const [wRows, wCols, yChannels] = [ndk.shape[0], ndk.shape[1], ndk.shape[3]];
56-
57-
// calculate the patch view in source image's size after dilations
58-
const pvRows = wRows + (wRows - 1) * (dilations[0] - 1);
59-
const pvCols = wCols + (wCols - 1) * (dilations[1] - 1);
25+
// create output Tensor after determining output size (after adjusting pads based on 'autoPad' attribute)
26+
const outputDims = PoolConvUtil.computeConvOutputShape(
27+
x.dims, w.dims, this.strides, this.dilations, this.kernelShape, this.pads, this.autoPad);
28+
const y = new Tensor(outputDims, x.type);
6029

61-
const yRows = Math.floor((xRows - pvRows + strides[0]) / strides[0]);
62-
const yCols = Math.floor((xCols - pvCols + strides[1]) / strides[1]);
63-
64-
const ySize = batchSize * yRows * yCols * yChannels;
65-
const patchSize = wRows * wCols * xChannels;
66-
67-
const ndf = ndarray(new Float64Array(ndk.size), [patchSize, yChannels]);
68-
const patch = ndarray(new Float64Array(patchSize), [wRows, wCols, xChannels]);
69-
for (let yChannel = 0; yChannel < yChannels; ++yChannel) {
70-
nd_ops.assign(patch, ndk.pick(null, null, null, yChannel));
71-
const reshapedPatch = ndarray(patch.data, [patchSize]);
72-
nd_ops.assign(ndf.pick(null, yChannel), reshapedPatch);
30+
conv2d(y, x, w, b, this.dilations, this.group, this.pads, this.strides);
31+
return [y];
7332
}
33+
}
7434

75-
const yArray = new Float64Array(ySize);
76-
const pixelVec = ndarray(new Float64Array(yChannels), [1, yChannels]);
77-
let offset = 0;
78-
for (let b = 0; b < batchSize; ++b) {
79-
const image = ndx.pick(b, null, null, null);
80-
for (let yRow = 0; yRow < yRows; ++yRow) {
81-
const xRowStart = yRow * strides[0];
82-
for (let yCol = 0; yCol < yCols; ++yCol) {
83-
const xColStart = yCol * strides[1];
84-
85-
const patchView = image.hi(xRowStart + pvRows, xColStart + pvCols, xChannels)
86-
.lo(xRowStart, xColStart, 0)
87-
.step(dilations[0], dilations[1], 1);
88-
nd_ops.assign(patch, patchView);
89-
const pvVec = ndarray(patch.data, [1, patchSize]);
90-
matrixProduct(pixelVec, pvVec, ndf);
91-
yArray.set(pixelVec.data, offset);
92-
offset += yChannels;
93-
}
35+
// tslint:disable: variable-name
36+
export function conv2d(
37+
Y: Tensor, X: Tensor, W: Tensor, B: Tensor|undefined, dilations: ReadonlyArray<number>, group: number,
38+
pads: ReadonlyArray<number>, strides: ReadonlyArray<number>): void {
39+
const input_num = X.dims[0];
40+
const input_channels = X.dims[1];
41+
const input_height = X.dims[2];
42+
const input_width = X.dims[3];
43+
44+
const filter_num = W.dims[0];
45+
const filter_channels = W.dims[1];
46+
const filter_height = W.dims[2];
47+
const filter_width = W.dims[3];
48+
const filter_size = filter_num * filter_channels * filter_height * filter_width;
49+
const kernel_shape = [filter_height, filter_width];
50+
51+
const output_num = Y.dims[0];
52+
const output_channels = Y.dims[1];
53+
const output_height = Y.dims[2];
54+
const output_width = Y.dims[3];
55+
const output_size = output_num * output_channels * output_height * output_width;
56+
57+
const input_image_size = input_height * input_width;
58+
const output_image_size = output_height * output_width;
59+
const kernel_size = kernel_shape[0] * kernel_shape[1];
60+
const X_offset = input_channels / group * input_image_size;
61+
const Y_offset = output_size / output_num / group;
62+
const W_offset = filter_size / group;
63+
const kernel_dim = input_channels / group * kernel_size;
64+
const col_buffer_size = kernel_dim * output_image_size;
65+
66+
const col_buffer_data = new Float32Array(col_buffer_size);
67+
68+
for (let image_id = 0; image_id < input_num; ++image_id) {
69+
let X_image_offset = 0;
70+
let Y_image_offset = 0;
71+
for (let group_id = 0; group_id < group; ++group_id) {
72+
im2col(
73+
X.floatData.subarray(X_image_offset + group_id * X_offset), col_buffer_data, input_channels / group,
74+
input_height, input_width, kernel_shape[0], kernel_shape[1], dilations[0], dilations[1], pads[0], pads[1],
75+
pads[2], pads[3], strides[0], strides[1]);
76+
77+
matMul2d(
78+
W.floatData.subarray(group_id * W_offset), col_buffer_data,
79+
Y.floatData.subarray(Y_image_offset + group_id * Y_offset), false, false, 1, 0, filter_num / group,
80+
output_image_size, kernel_dim);
9481
}
82+
83+
X_image_offset += X_offset * group;
84+
Y_image_offset += Y_offset * group;
9585
}
96-
const ndy = ndarray(yArray, [batchSize, yRows, yCols, yChannels]);
97-
const ndyTransed = ndarray(new Float32Array(ySize), [batchSize, yChannels, yRows, yCols]);
98-
nd_ops.assign(ndyTransed, ndy.transpose(0, 3, 1, 2));
99-
const Y = new Tensor(ndyTransed.shape, 'float32');
100-
Y.floatData.set(ndyTransed.data);
10186

10287
// Add bias if applicable
103-
if (b) {
104-
const biasData = b.numberData;
88+
if (B) {
89+
const biasData = B.floatData;
10590
const outputData = Y.floatData;
10691
const batchSize = Y.dims[0];
10792
const outputChannels = Y.dims[1];
@@ -116,6 +101,65 @@ export function conv(
116101
}
117102
}
118103
}
104+
}
105+
106+
function im2col(
107+
data_im: Float32Array|Float64Array, data_col: Float32Array|Float64Array, channels: number, height: number,
108+
width: number, kernel_h: number, kernel_w: number, dilation_h: number, dilation_w: number, pad_t: number,
109+
pad_l: number, pad_b: number, pad_r: number, stride_h: number, stride_w: number) {
110+
const output_h = ~~((height + pad_b + pad_t - (dilation_h * (kernel_h - 1) + 1)) / stride_h) + 1;
111+
const output_w = ~~((width + pad_l + pad_r - (dilation_w * (kernel_w - 1) + 1)) / stride_w) + 1;
112+
113+
// Fast path for zero padding and no dilation
114+
// From Torch, THNN_(unfolded_copy)
115+
if (dilation_h === 1 && dilation_w === 1 && pad_l === 0 && pad_r === 0 && pad_t === 0 && pad_b === 0) {
116+
for (let k = 0; k < channels * kernel_h * kernel_w; k++) {
117+
const nip = ~~(k / (kernel_h * kernel_w));
118+
const rest = k % (kernel_h * kernel_w);
119+
const kh = ~~(rest / kernel_w);
120+
const kw = rest % kernel_w;
121+
const dst_offset = nip * (kernel_h * kernel_w * output_h * output_w) + kh * (kernel_w * output_h * output_w) +
122+
kw * (output_h * output_w);
123+
const src_offset = nip * (height * width);
124+
for (let y = 0; y < output_h; y++) {
125+
const iy = y * stride_h + kh;
126+
const ix = kw;
127+
if (stride_w === 1) {
128+
data_col.set(
129+
data_im.subarray(src_offset + iy * width + ix, src_offset + iy * width + ix + output_w),
130+
dst_offset + y * output_w);
131+
} else {
132+
for (let x = 0; x < output_w; x++) {
133+
data_col[dst_offset + (y * output_w + x)] = data_im[src_offset + (iy * width + ix + x * stride_w)];
134+
}
135+
}
136+
}
137+
}
138+
return;
139+
}
119140

120-
return Y;
141+
// Baseline
142+
const dkernel_h = dilation_h * (kernel_h - 1) + 1;
143+
const dkernel_w = dilation_w * (kernel_w - 1) + 1;
144+
145+
const height_col = ~~((height + pad_t + pad_b - dkernel_h) / stride_h) + 1;
146+
const width_col = ~~((width + pad_l + pad_r - dkernel_w) / stride_w) + 1;
147+
148+
const channels_col = channels * kernel_h * kernel_w;
149+
for (let c = 0; c < channels_col; ++c) {
150+
const w_offset = c % kernel_w;
151+
const h_offset = ~~(c / kernel_w) % kernel_h;
152+
const c_im = ~~(c / (kernel_h * kernel_w));
153+
for (let h = 0; h < height_col; ++h) {
154+
for (let w = 0; w < width_col; ++w) {
155+
const h_pad = h * stride_h - pad_t + h_offset * dilation_h;
156+
const w_pad = w * stride_w - pad_l + w_offset * dilation_w;
157+
if (h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width) {
158+
data_col[(c * height_col + h) * width_col + w] = data_im[(c_im * height + h_pad) * width + w_pad];
159+
} else {
160+
data_col[(c * height_col + h) * width_col + w] = 0;
161+
}
162+
}
163+
}
164+
}
121165
}

0 commit comments

Comments
 (0)