@@ -7,17 +7,17 @@ \section*{Overview}
7
7
\centering
8
8
\hspace *{-1cm}\begin {tabular }{lllll}
9
9
\toprule
10
- Name & Function $ \varphi (x)$ & Range of Values & $ \varphi '(x)$ \\\midrule % & Used by
11
- Sign function$ ^\dagger $ & $ \begin {cases}+1 &\text {if } x \geq 0 \\ -1 &\text {if } x < 0 \end {cases}$ & $ \Set {-1,1}$ & $ 0 $ \\ % & \cite{971754} \\
12
- \parbox [t]{2.6cm}{Heaviside\\ step function$ ^\dagger $ } & $ \begin {cases}+1 &\text {if } x > 0 \\ 0 &\text {if } x < 0 \end {cases}$ & $ \Set {0, 1}$ & $ 0 $ \\ % & \cite{mcculloch1943logical}\\
13
- Logistic function & $ \frac {1}{1+e^{-x}}$ & $ [0 , 1 ]$ & $ \frac {e^x}{(e^x +1)^2}$ \\ % & \cite{duch1999survey} \\
14
- Tanh & $ \frac {e^x - e^{-x}}{e^x + e^{-x}} = \tanh (x)$ & $ [-1 , 1 ]$ & $ \sech ^2 (x)$ \\ % & \cite{LeNet-5,Thoma:2014}\\
15
- \gls {ReLU}$ ^\dagger $ & $ \max (0 , x)$ & $ [0 , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ 0 &\text {if } x < 0 \end {cases}$ \\ % & \cite{AlexNet-2012}\\
16
- \parbox [t]{2.6cm}{\gls {LReLU}$ ^\dagger $ \footnotemark \\ (\gls {PReLU})} & $ \varphi (x) = \max (\alpha x, x)$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ \alpha &\text {if } x < 0 \end {cases}$ \\ % & \cite{maas2013rectifier,he2015delving} \\
17
- Softplus & $ \log (e^x + 1 )$ & $ (0 , +\infty )$ & $ \frac {e^x}{e^x + 1}$ \\ % & \cite{dugas2001incorporating,glorot2011deep} \\
18
- \gls {ELU} & $ \begin {cases}x &\text {if } x > 0 \\ \alpha (e^x - 1 ) &\text {if } x \leq 0 \end {cases}$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ \alpha e^x &\text {otherwise}\end {cases}$ \\ % & \cite{clevert2015fast} \\
19
- Softmax$ ^\ddagger $ & $ o(\mathbf {x})_j = \frac {e^{x_j}}{\sum _{k=1}^K e^{x_k}}$ & $ [0 , 1 ]^K$ & $ o(\mathbf {x})_j \cdot \frac {\sum _{k=1}^K e^{x_k} - e^{x_j}}{\sum _{k=1}^K e^{x_k}}$ \\ % & \cite{AlexNet-2012,Thoma:2014}\\
20
- Maxout$ ^\ddagger $ & $ o(\mathbf {x}) = \max _{x \in \mathbf {x}} x$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x_i = \max \mathbf {x}\\ 0 &\text {otherwise}\end {cases}$ \\ % & \cite{goodfellow2013maxout} \\
10
+ Name & Function $ \varphi (x)$ & Range of Values & $ \varphi '(x)$ & Used by \\\midrule %
11
+ Sign function$ ^\dagger $ & $ \begin {cases}+1 &\text {if } x \geq 0 \\ -1 &\text {if } x < 0 \end {cases}$ & $ \Set {-1,1}$ & $ 0 $ & \cite {971754 } \\
12
+ \parbox [t]{2.6cm}{Heaviside\\ step function$ ^\dagger $ } & $ \begin {cases}+1 &\text {if } x > 0 \\ 0 &\text {if } x < 0 \end {cases}$ & $ \Set {0, 1}$ & $ 0 $ & \cite {mcculloch1943logical }\\
13
+ Logistic function & $ \frac {1}{1+e^{-x}}$ & $ [0 , 1 ]$ & $ \frac {e^x}{(e^x +1)^2}$ & \cite {duch1999survey } \\
14
+ Tanh & $ \frac {e^x - e^{-x}}{e^x + e^{-x}} = \tanh (x)$ & $ [-1 , 1 ]$ & $ \sech ^2 (x)$ & \cite {LeNet -5 ,Thoma:2014 }\\
15
+ \gls {ReLU}$ ^\dagger $ & $ \max (0 , x)$ & $ [0 , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ 0 &\text {if } x < 0 \end {cases}$ & \cite {AlexNet -2012 }\\
16
+ \parbox [t]{2.6cm}{\gls {LReLU}$ ^\dagger $ \footnotemark \\ (\gls {PReLU})} & $ \varphi (x) = \max (\alpha x, x)$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ \alpha &\text {if } x < 0 \end {cases}$ & \cite {maas2013rectifier ,he2015delving } \\
17
+ Softplus & $ \log (e^x + 1 )$ & $ (0 , +\infty )$ & $ \frac {e^x}{e^x + 1}$ & \cite {dugas2001incorporating ,glorot2011deep } \\
18
+ \gls {ELU} & $ \begin {cases}x &\text {if } x > 0 \\ \alpha (e^x - 1 ) &\text {if } x \leq 0 \end {cases}$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ \alpha e^x &\text {otherwise}\end {cases}$ & \cite {clevert2015fast } \\
19
+ Softmax$ ^\ddagger $ & $ o(\mathbf {x})_j = \frac {e^{x_j}}{\sum _{k=1}^K e^{x_k}}$ & $ [0 , 1 ]^K$ & $ o(\mathbf {x})_j \cdot \frac {\sum _{k=1}^K e^{x_k} - e^{x_j}}{\sum _{k=1}^K e^{x_k}}$ & \cite {AlexNet -2012 ,Thoma:2014 }\\
20
+ Maxout$ ^\ddagger $ & $ o(\mathbf {x}) = \max _{x \in \mathbf {x}} x$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x_i = \max \mathbf {x}\\ 0 &\text {otherwise}\end {cases}$ & \cite {goodfellow2013maxout } \\
21
21
\bottomrule
22
22
\end {tabular }
23
23
\caption [Activation functions]{Overview of activation functions. Functions
@@ -63,13 +63,11 @@ \section*{Evaluation Results}
63
63
\end {tabular }
64
64
\caption [Activation function evaluation results on CIFAR-100]{Training and
65
65
test accuracy of adjusted baseline models trained with different
66
- activation functions on CIFAR-100. For LReLU, $ \alpha = 0.3 $ was
66
+ activation functions on CIFAR-100. For \gls { LReLU} , $ \alpha = 0.3 $ was
67
67
chosen.}
68
68
\label {table:CIFAR-100-accuracies-activation-functions }
69
69
\end {table }
70
70
71
- \glsreset {LReLU}
72
-
73
71
\begin {table }[H]
74
72
\centering
75
73
\setlength\tabcolsep {1.5pt}
@@ -91,7 +89,7 @@ \section*{Evaluation Results}
91
89
\end {tabular }
92
90
\caption [Activation function evaluation results on HASYv2]{Test accuracy of
93
91
adjusted baseline models trained with different activation
94
- functions on HASYv2. For LReLU, $ \alpha = 0.3 $ was chosen.}
92
+ functions on HASYv2. For \gls { LReLU} , $ \alpha = 0.3 $ was chosen.}
95
93
\label {table:HASYv2-accuracies-activation-functions }
96
94
\end {table }
97
95
@@ -116,8 +114,93 @@ \section*{Evaluation Results}
116
114
\end {tabular }
117
115
\caption [Activation function evaluation results on STL-10]{Test accuracy of
118
116
adjusted baseline models trained with different activation
119
- functions on STL-10. For LReLU, $ \alpha = 0.3 $ was chosen.}
117
+ functions on STL-10. For \gls { LReLU} , $ \alpha = 0.3 $ was chosen.}
120
118
\label {table:STL-10-accuracies-activation-functions }
121
119
\end {table }
122
120
121
+ \begin {table }[H]
122
+ \centering
123
+ \hspace *{-1cm}\begin {tabular }{lllll}
124
+ \toprule
125
+ Name & Function $ \varphi (x)$ & Range of Values & $ \varphi '(x)$ \\\midrule % & Used by
126
+ Sign function$ ^\dagger $ & $ \begin {cases}+1 &\text {if } x \geq 0 \\ -1 &\text {if } x < 0 \end {cases}$ & $ \Set {-1,1}$ & $ 0 $ \\% & \cite{971754} \\
127
+ \parbox [t]{2.6cm}{Heaviside\\ step function$ ^\dagger $ } & $ \begin {cases}+1 &\text {if } x > 0 \\ 0 &\text {if } x < 0 \end {cases}$ & $ \Set {0, 1}$ & $ 0 $ \\% & \cite{mcculloch1943logical}\\
128
+ Logistic function & $ \frac {1}{1+e^{-x}}$ & $ [0 , 1 ]$ & $ \frac {e^x}{(e^x +1)^2}$ \\% & \cite{duch1999survey} \\
129
+ Tanh & $ \frac {e^x - e^{-x}}{e^x + e^{-x}} = \tanh (x)$ & $ [-1 , 1 ]$ & $ \sech ^2 (x)$ \\% & \cite{LeNet-5,Thoma:2014}\\
130
+ \gls {ReLU}$ ^\dagger $ & $ \max (0 , x)$ & $ [0 , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ 0 &\text {if } x < 0 \end {cases}$ \\% & \cite{AlexNet-2012}\\
131
+ \parbox [t]{2.6cm}{\gls {LReLU}$ ^\dagger $ \footnotemark \\ (\gls {PReLU})} & $ \varphi (x) = \max (\alpha x, x)$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ \alpha &\text {if } x < 0 \end {cases}$ \\% & \cite{maas2013rectifier,he2015delving} \\
132
+ Softplus & $ \log (e^x + 1 )$ & $ (0 , +\infty )$ & $ \frac {e^x}{e^x + 1}$ \\% & \cite{dugas2001incorporating,glorot2011deep} \\
133
+ \gls {ELU} & $ \begin {cases}x &\text {if } x > 0 \\ \alpha (e^x - 1 ) &\text {if } x \leq 0 \end {cases}$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x > 0 \\ \alpha e^x &\text {otherwise}\end {cases}$ \\% & \cite{clevert2015fast} \\
134
+ Softmax$ ^\ddagger $ & $ o(\mathbf {x})_j = \frac {e^{x_j}}{\sum _{k=1}^K e^{x_k}}$ & $ [0 , 1 ]^K$ & $ o(\mathbf {x})_j \cdot \frac {\sum _{k=1}^K e^{x_k} - e^{x_j}}{\sum _{k=1}^K e^{x_k}}$ \\% & \cite{AlexNet-2012,Thoma:2014}\\
135
+ Maxout$ ^\ddagger $ & $ o(\mathbf {x}) = \max _{x \in \mathbf {x}} x$ & $ (-\infty , +\infty )$ & $ \begin {cases}1 &\text {if } x_i = \max \mathbf {x}\\ 0 &\text {otherwise}\end {cases}$ \\% & \cite{goodfellow2013maxout} \\
136
+ \bottomrule
137
+ \end {tabular }
138
+ \caption [Activation functions]{Overview of activation functions. Functions
139
+ marked with $ \dagger $ are not differentiable at 0 and functions
140
+ marked with $ \ddagger $ operate on all elements of a layer
141
+ simultaneously. The hyperparameters $ \alpha \in (0 , 1 )$ of Leaky
142
+ ReLU and ELU are typically $ \alpha = 0.01 $ . Other activation
143
+ function like randomized leaky ReLUs exist~\cite {xu2015empirical },
144
+ but are far less commonly used.\\
145
+ Some functions are smoothed versions of others, like the logistic
146
+ function for the Heaviside step function, tanh for the sign
147
+ function, softplus for ReLU.\\
148
+ Softmax is the standard activation function for the last layer of
149
+ a classification network as it produces a probability
150
+ distribution. See \Cref {fig:activation-functions-plot } for a plot
151
+ of some of them.}
152
+ \label {table:activation-functions-overview }
153
+ \end {table }
154
+ \footnotetext {$ \alpha $ is a hyperparameter in leaky ReLU, but a learnable parameter in the parametric ReLU function.}
155
+
156
+ \begin {figure }[ht]
157
+ \centering
158
+ \begin {tikzpicture }
159
+ \definecolor {color1}{HTML}{E66101}
160
+ \definecolor {color2}{HTML}{FDB863}
161
+ \definecolor {color3}{HTML}{B2ABD2}
162
+ \definecolor {color4}{HTML}{5E3C99}
163
+ \begin {axis }[
164
+ legend pos=north west,
165
+ legend cell align={left},
166
+ axis x line=middle,
167
+ axis y line=middle,
168
+ x tick label style={/pgf/number format/fixed,
169
+ /pgf/number format/fixed zerofill,
170
+ /pgf/number format/precision=1},
171
+ y tick label style={/pgf/number format/fixed,
172
+ /pgf/number format/fixed zerofill,
173
+ /pgf/number format/precision=1},
174
+ grid = major,
175
+ width=16cm,
176
+ height=8cm,
177
+ grid style={dashed, gray!30},
178
+ xmin=-2, % start the diagram at this x-coordinate
179
+ xmax= 2, % end the diagram at this x-coordinate
180
+ ymin=-1, % start the diagram at this y-coordinate
181
+ ymax= 2, % end the diagram at this y-coordinate
182
+ xlabel=x,
183
+ ylabel=y,
184
+ tick align=outside,
185
+ enlargelimits=false]
186
+ \addplot [domain=-2:2, color1, ultra thick,samples=500] {1/(1+exp(-x))};
187
+ \addplot [domain=-2:2, color2, ultra thick,samples=500] {tanh(x)};
188
+ \addplot [domain=-2:2, color4, ultra thick,samples=500] {max(0, x)};
189
+ \addplot [domain=-2:2, color4, ultra thick,samples=500, dashed] {ln(exp(x) + 1)};
190
+ \addplot [domain=-2:2, color3, ultra thick,samples=500, dotted] {max(x, exp(x) - 1)};
191
+ \addlegendentry {$ \varphi _1 (x)=\frac {1}{1+e^{-x}}$ }
192
+ \addlegendentry {$ \varphi _2 (x)=\tanh (x)$ }
193
+ \addlegendentry {$ \varphi _3 (x)=\max (0 , x)$ }
194
+ \addlegendentry {$ \varphi _4 (x)=\log (e^x + 1 )$ }
195
+ \addlegendentry {$ \varphi _5 (x)=\max (x, e^x - 1 )$ }
196
+ \end {axis }
197
+ \end {tikzpicture }
198
+ \caption [Activation functions]{Activation functions plotted in $ [-2 , +2 ]$ .
199
+ $ \tanh $ and ELU are able to produce negative numbers. The image of
200
+ ELU, ReLU and Softplus is not bound on the positive side, whereas
201
+ $ \tanh $ and the logistic function are always below~1.}
202
+ \label {fig:activation-functions-plot }
203
+ \end {figure }
204
+
205
+ \glsreset {LReLU}
123
206
\twocolumn
0 commit comments