@@ -233,24 +233,41 @@ meanpool!(y::DenseCuArray{T}, x::DenseCuArray{T}, pdims::PoolDims) where T<:CUDN
233
233
234
234
# Activation
235
235
236
- # in-place for x
237
- Base. broadcasted (:: typeof (NNlib. σ), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } =
238
- (cudnnActivationForward (reshape4D (x), mode= CUDNN_ACTIVATION_SIGMOID, coeff= 0.0 ); return x)
239
-
240
- Base. broadcasted (:: typeof (NNlib. relu), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } =
241
- (cudnnActivationForward (reshape4D (x), mode= CUDNN_ACTIVATION_RELU, coeff= 0.0 ); return x)
242
-
243
- Base. broadcasted (:: typeof (NNlib. tanh), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } =
244
- (cudnnActivationForward (reshape4D (x), mode= CUDNN_ACTIVATION_TANH, coeff= 0.0 ); return x)
245
-
246
- Base. broadcasted (:: typeof (NNlib. relu6), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } =
247
- (cudnnActivationForward (reshape4D (x), mode= CUDNN_ACTIVATION_CLIPPED_RELU, coeff= 6.0 ); return x)
236
+ using Base. Broadcast
237
+
238
+ for (f, op) in [
239
+ CUDA. tanh => (src,dst)-> cudnnActivationForward (reshape4D (src), reshape4D (dst),
240
+ mode= CUDNN_ACTIVATION_TANH),
241
+ NNlib. σ => (src,dst)-> cudnnActivationForward (reshape4D (src), reshape4D (dst),
242
+ mode= CUDNN_ACTIVATION_SIGMOID),
243
+ NNlib. elu => (src,dst)-> cudnnActivationForward (reshape4D (src), reshape4D (dst),
244
+ mode= CUDNN_ACTIVATION_ELU),
245
+ NNlib. relu => (src,dst)-> cudnnActivationForward (reshape4D (src), reshape4D (dst),
246
+ mode= CUDNN_ACTIVATION_RELU),
247
+ NNlib. relu6 => (src,dst)-> cudnnActivationForward (reshape4D (src), reshape4D (dst),
248
+ mode= CUDNN_ACTIVATION_CLIPPED_RELU,
249
+ coeff= 6.0 ),
250
+ NNlib. leakyrelu => (src,dst)-> cudnnOpTensor (CUDNN_OP_TENSOR_MAX, reshape4D (src),
251
+ reshape4D (src), reshape4D (dst),
252
+ alpha1= 0.01 )]
253
+ @eval begin
254
+ # in-place
255
+ function Base. materialize! (dst:: DenseCuArray{<:CUDNNFloat} ,
256
+ bc:: Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}} )
257
+ $ op (bc. args[1 ], dst)
258
+ return dst
259
+ end
248
260
249
- Base. broadcasted (:: typeof (NNlib. elu), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } =
250
- (cudnnActivationForward (reshape4D (x), mode= CUDNN_ACTIVATION_ELU, coeff= 1.0 ); return x)
261
+ # out of place
262
+ function Base. materialize (bc:: Broadcast.Broadcasted{<:Any,<:Any,typeof($f),<:Tuple{DenseCuArray}} )
263
+ ElType = Broadcast. combine_eltypes (bc. f, bc. args)
264
+ dst = similar (bc, ElType)
265
+ $ op (bc. args[1 ], dst)
266
+ return dst
267
+ end
268
+ end
269
+ end
251
270
252
271
# CUDNN_ACTIVATION_IDENTITY does not work with cudnnActivationForward
253
- Base. broadcasted (:: typeof (NNlib. identity), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } = x
254
-
255
- Base. broadcasted (:: typeof (NNlib. leakyrelu), x:: DenseCuArray{T} , a= T (0.01 )) where {T<: CUDNNFloat } =
256
- (cudnnOpTensor (CUDNN_OP_TENSOR_MAX, reshape4D (x), reshape4D (x), reshape4D (x), alpha1= a); return x)
272
+ # FIXME : put this optimization in GPUArrays' `copyto!` (like Base.Broadcast's `copyto!`)
273
+ Base. broadcasted (:: typeof (identity), x:: DenseCuArray{T} ) where {T<: CUDNNFloat } = x
0 commit comments