1
1
// Copyright (c) Microsoft Corporation. All rights reserved.
2
2
// Licensed under the MIT license.
3
3
4
- import ndarray from 'ndarray' ;
5
- import matrixProduct from 'ndarray-gemm' ;
6
- import nd_ops from 'ndarray-ops' ;
7
-
8
4
import { Conv } from '../../../ops/conv' ;
9
5
import { Tensor } from '../../../tensor' ;
10
6
import { PoolConvUtil } from '../../../util' ;
11
7
import { CpuInferenceHandler } from '../inference-handler' ;
12
8
9
+ import { matMul2d } from './matmul' ;
10
+
13
11
export class CpuConv extends Conv {
14
12
run ( inferenceHandler : CpuInferenceHandler , inputs : Tensor [ ] ) : Tensor [ ] {
13
+ const x = inputs [ 0 ] ;
14
+ const w = inputs [ 1 ] ;
15
+ const b = inputs . length === 3 ? inputs [ 2 ] : undefined ;
16
+
15
17
// if kernelShape is not specified in the attributes of this op, infer it from the weight tensor dims
16
18
if ( this . kernelShape . length === 0 ) {
17
19
const wDims = inputs [ 1 ] . dims ;
@@ -20,88 +22,71 @@ export class CpuConv extends Conv {
20
22
}
21
23
}
22
24
23
- const output = conv (
24
- inputs [ 0 ] , inputs [ 1 ] , inputs . length === 3 ? inputs [ 2 ] : null , this . autoPad , this . dilations , this . group ,
25
- this . kernelShape , this . pads , this . strides ) ;
26
- return [ output ] ;
27
- }
28
- }
29
-
30
- export function conv (
31
- x : Tensor , w : Tensor , b : Tensor | null , autoPad : string , dilations : number [ ] , group : number , kernelShape : number [ ] ,
32
- pads : number [ ] , strides : number [ ] ) : Tensor {
33
- let ndx = ndarray ( x . floatData as Float32Array , x . dims . slice ( 0 ) ) . transpose ( 0 , 2 , 3 , 1 ) ;
34
- const ndk = ndarray ( w . floatData as Float32Array , w . dims . slice ( 0 ) ) . transpose ( 2 , 3 , 1 , 0 ) ;
35
-
36
- // adjusting pads based on 'autoPad' attribute
37
- PoolConvUtil . adjustPadsBasedOnAutoPad ( x . dims , strides , dilations , kernelShape , pads , autoPad ) ;
38
-
39
- // padding if needed
40
- const localPads : Array < [ number , number ] > = [ [ 0 , 0 ] , [ pads [ 0 ] , pads [ 2 ] ] , [ pads [ 1 ] , pads [ 3 ] ] , [ 0 , 0 ] ] ;
41
- const padTotal = localPads . reduce ( ( s , p ) => s + p [ 0 ] + p [ 1 ] , 0 ) ;
42
- if ( padTotal !== 0 ) {
43
- const shape : number [ ] = ndx . shape ;
44
- const newShape = shape . map ( ( len , index ) => len + localPads [ index ] [ 0 ] + localPads [ index ] [ 1 ] ) ;
45
- const newSize = newShape . reduce ( ( m , v ) => m * v , 1 ) ;
46
- const ndp = ndarray ( new Float32Array ( newSize ) , newShape ) ;
47
- const hiPoint = localPads . map ( ( pair , index ) => newShape [ index ] - pair [ 1 ] ) ;
48
- const loPoint = localPads . map ( pair => pair [ 0 ] ) ;
49
- const originalSlice = ndp . hi ( ...hiPoint ) . lo ( ...loPoint ) ;
50
- nd_ops . assign ( originalSlice , ndx ) ;
51
- ndx = ndp ;
52
- }
53
-
54
- const [ batchSize , xRows , xCols , xChannels ] = ndx . shape ;
55
- const [ wRows , wCols , yChannels ] = [ ndk . shape [ 0 ] , ndk . shape [ 1 ] , ndk . shape [ 3 ] ] ;
56
-
57
- // calculate the patch view in source image's size after dilations
58
- const pvRows = wRows + ( wRows - 1 ) * ( dilations [ 0 ] - 1 ) ;
59
- const pvCols = wCols + ( wCols - 1 ) * ( dilations [ 1 ] - 1 ) ;
25
+ // create output Tensor after determining output size (after adjusting pads based on 'autoPad' attribute)
26
+ const outputDims = PoolConvUtil . computeConvOutputShape (
27
+ x . dims , w . dims , this . strides , this . dilations , this . kernelShape , this . pads , this . autoPad ) ;
28
+ const y = new Tensor ( outputDims , x . type ) ;
60
29
61
- const yRows = Math . floor ( ( xRows - pvRows + strides [ 0 ] ) / strides [ 0 ] ) ;
62
- const yCols = Math . floor ( ( xCols - pvCols + strides [ 1 ] ) / strides [ 1 ] ) ;
63
-
64
- const ySize = batchSize * yRows * yCols * yChannels ;
65
- const patchSize = wRows * wCols * xChannels ;
66
-
67
- const ndf = ndarray ( new Float64Array ( ndk . size ) , [ patchSize , yChannels ] ) ;
68
- const patch = ndarray ( new Float64Array ( patchSize ) , [ wRows , wCols , xChannels ] ) ;
69
- for ( let yChannel = 0 ; yChannel < yChannels ; ++ yChannel ) {
70
- nd_ops . assign ( patch , ndk . pick ( null , null , null , yChannel ) ) ;
71
- const reshapedPatch = ndarray ( patch . data , [ patchSize ] ) ;
72
- nd_ops . assign ( ndf . pick ( null , yChannel ) , reshapedPatch ) ;
30
+ conv2d ( y , x , w , b , this . dilations , this . group , this . pads , this . strides ) ;
31
+ return [ y ] ;
73
32
}
33
+ }
74
34
75
- const yArray = new Float64Array ( ySize ) ;
76
- const pixelVec = ndarray ( new Float64Array ( yChannels ) , [ 1 , yChannels ] ) ;
77
- let offset = 0 ;
78
- for ( let b = 0 ; b < batchSize ; ++ b ) {
79
- const image = ndx . pick ( b , null , null , null ) ;
80
- for ( let yRow = 0 ; yRow < yRows ; ++ yRow ) {
81
- const xRowStart = yRow * strides [ 0 ] ;
82
- for ( let yCol = 0 ; yCol < yCols ; ++ yCol ) {
83
- const xColStart = yCol * strides [ 1 ] ;
84
-
85
- const patchView = image . hi ( xRowStart + pvRows , xColStart + pvCols , xChannels )
86
- . lo ( xRowStart , xColStart , 0 )
87
- . step ( dilations [ 0 ] , dilations [ 1 ] , 1 ) ;
88
- nd_ops . assign ( patch , patchView ) ;
89
- const pvVec = ndarray ( patch . data , [ 1 , patchSize ] ) ;
90
- matrixProduct ( pixelVec , pvVec , ndf ) ;
91
- yArray . set ( pixelVec . data , offset ) ;
92
- offset += yChannels ;
93
- }
35
+ // tslint:disable: variable-name
36
+ export function conv2d (
37
+ Y : Tensor , X : Tensor , W : Tensor , B : Tensor | undefined , dilations : ReadonlyArray < number > , group : number ,
38
+ pads : ReadonlyArray < number > , strides : ReadonlyArray < number > ) : void {
39
+ const input_num = X . dims [ 0 ] ;
40
+ const input_channels = X . dims [ 1 ] ;
41
+ const input_height = X . dims [ 2 ] ;
42
+ const input_width = X . dims [ 3 ] ;
43
+
44
+ const filter_num = W . dims [ 0 ] ;
45
+ const filter_channels = W . dims [ 1 ] ;
46
+ const filter_height = W . dims [ 2 ] ;
47
+ const filter_width = W . dims [ 3 ] ;
48
+ const filter_size = filter_num * filter_channels * filter_height * filter_width ;
49
+ const kernel_shape = [ filter_height , filter_width ] ;
50
+
51
+ const output_num = Y . dims [ 0 ] ;
52
+ const output_channels = Y . dims [ 1 ] ;
53
+ const output_height = Y . dims [ 2 ] ;
54
+ const output_width = Y . dims [ 3 ] ;
55
+ const output_size = output_num * output_channels * output_height * output_width ;
56
+
57
+ const input_image_size = input_height * input_width ;
58
+ const output_image_size = output_height * output_width ;
59
+ const kernel_size = kernel_shape [ 0 ] * kernel_shape [ 1 ] ;
60
+ const X_offset = input_channels / group * input_image_size ;
61
+ const Y_offset = output_size / output_num / group ;
62
+ const W_offset = filter_size / group ;
63
+ const kernel_dim = input_channels / group * kernel_size ;
64
+ const col_buffer_size = kernel_dim * output_image_size ;
65
+
66
+ const col_buffer_data = new Float32Array ( col_buffer_size ) ;
67
+
68
+ for ( let image_id = 0 ; image_id < input_num ; ++ image_id ) {
69
+ let X_image_offset = 0 ;
70
+ let Y_image_offset = 0 ;
71
+ for ( let group_id = 0 ; group_id < group ; ++ group_id ) {
72
+ im2col (
73
+ X . floatData . subarray ( X_image_offset + group_id * X_offset ) , col_buffer_data , input_channels / group ,
74
+ input_height , input_width , kernel_shape [ 0 ] , kernel_shape [ 1 ] , dilations [ 0 ] , dilations [ 1 ] , pads [ 0 ] , pads [ 1 ] ,
75
+ pads [ 2 ] , pads [ 3 ] , strides [ 0 ] , strides [ 1 ] ) ;
76
+
77
+ matMul2d (
78
+ W . floatData . subarray ( group_id * W_offset ) , col_buffer_data ,
79
+ Y . floatData . subarray ( Y_image_offset + group_id * Y_offset ) , false , false , 1 , 0 , filter_num / group ,
80
+ output_image_size , kernel_dim ) ;
94
81
}
82
+
83
+ X_image_offset += X_offset * group ;
84
+ Y_image_offset += Y_offset * group ;
95
85
}
96
- const ndy = ndarray ( yArray , [ batchSize , yRows , yCols , yChannels ] ) ;
97
- const ndyTransed = ndarray ( new Float32Array ( ySize ) , [ batchSize , yChannels , yRows , yCols ] ) ;
98
- nd_ops . assign ( ndyTransed , ndy . transpose ( 0 , 3 , 1 , 2 ) ) ;
99
- const Y = new Tensor ( ndyTransed . shape , 'float32' ) ;
100
- Y . floatData . set ( ndyTransed . data ) ;
101
86
102
87
// Add bias if applicable
103
- if ( b ) {
104
- const biasData = b . numberData ;
88
+ if ( B ) {
89
+ const biasData = B . floatData ;
105
90
const outputData = Y . floatData ;
106
91
const batchSize = Y . dims [ 0 ] ;
107
92
const outputChannels = Y . dims [ 1 ] ;
@@ -116,6 +101,65 @@ export function conv(
116
101
}
117
102
}
118
103
}
104
+ }
105
+
106
+ function im2col (
107
+ data_im : Float32Array | Float64Array , data_col : Float32Array | Float64Array , channels : number , height : number ,
108
+ width : number , kernel_h : number , kernel_w : number , dilation_h : number , dilation_w : number , pad_t : number ,
109
+ pad_l : number , pad_b : number , pad_r : number , stride_h : number , stride_w : number ) {
110
+ const output_h = ~ ~ ( ( height + pad_b + pad_t - ( dilation_h * ( kernel_h - 1 ) + 1 ) ) / stride_h ) + 1 ;
111
+ const output_w = ~ ~ ( ( width + pad_l + pad_r - ( dilation_w * ( kernel_w - 1 ) + 1 ) ) / stride_w ) + 1 ;
112
+
113
+ // Fast path for zero padding and no dilation
114
+ // From Torch, THNN_(unfolded_copy)
115
+ if ( dilation_h === 1 && dilation_w === 1 && pad_l === 0 && pad_r === 0 && pad_t === 0 && pad_b === 0 ) {
116
+ for ( let k = 0 ; k < channels * kernel_h * kernel_w ; k ++ ) {
117
+ const nip = ~ ~ ( k / ( kernel_h * kernel_w ) ) ;
118
+ const rest = k % ( kernel_h * kernel_w ) ;
119
+ const kh = ~ ~ ( rest / kernel_w ) ;
120
+ const kw = rest % kernel_w ;
121
+ const dst_offset = nip * ( kernel_h * kernel_w * output_h * output_w ) + kh * ( kernel_w * output_h * output_w ) +
122
+ kw * ( output_h * output_w ) ;
123
+ const src_offset = nip * ( height * width ) ;
124
+ for ( let y = 0 ; y < output_h ; y ++ ) {
125
+ const iy = y * stride_h + kh ;
126
+ const ix = kw ;
127
+ if ( stride_w === 1 ) {
128
+ data_col . set (
129
+ data_im . subarray ( src_offset + iy * width + ix , src_offset + iy * width + ix + output_w ) ,
130
+ dst_offset + y * output_w ) ;
131
+ } else {
132
+ for ( let x = 0 ; x < output_w ; x ++ ) {
133
+ data_col [ dst_offset + ( y * output_w + x ) ] = data_im [ src_offset + ( iy * width + ix + x * stride_w ) ] ;
134
+ }
135
+ }
136
+ }
137
+ }
138
+ return ;
139
+ }
119
140
120
- return Y ;
141
+ // Baseline
142
+ const dkernel_h = dilation_h * ( kernel_h - 1 ) + 1 ;
143
+ const dkernel_w = dilation_w * ( kernel_w - 1 ) + 1 ;
144
+
145
+ const height_col = ~ ~ ( ( height + pad_t + pad_b - dkernel_h ) / stride_h ) + 1 ;
146
+ const width_col = ~ ~ ( ( width + pad_l + pad_r - dkernel_w ) / stride_w ) + 1 ;
147
+
148
+ const channels_col = channels * kernel_h * kernel_w ;
149
+ for ( let c = 0 ; c < channels_col ; ++ c ) {
150
+ const w_offset = c % kernel_w ;
151
+ const h_offset = ~ ~ ( c / kernel_w ) % kernel_h ;
152
+ const c_im = ~ ~ ( c / ( kernel_h * kernel_w ) ) ;
153
+ for ( let h = 0 ; h < height_col ; ++ h ) {
154
+ for ( let w = 0 ; w < width_col ; ++ w ) {
155
+ const h_pad = h * stride_h - pad_t + h_offset * dilation_h ;
156
+ const w_pad = w * stride_w - pad_l + w_offset * dilation_w ;
157
+ if ( h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width ) {
158
+ data_col [ ( c * height_col + h ) * width_col + w ] = data_im [ ( c_im * height + h_pad ) * width + w_pad ] ;
159
+ } else {
160
+ data_col [ ( c * height_col + h ) * width_col + w ] = 0 ;
161
+ }
162
+ }
163
+ }
164
+ }
121
165
}
0 commit comments