@@ -41,111 +41,21 @@ module function layernorm_layer_cons() &
4141 end function layernorm_layer_cons
4242 end interface layernorm_layer
4343
44- contains
45- module function layernorm_layer_cons () &
46- result(res)
47- type (layernorm_layer) :: res
48-
49- res % eps = 1e-5
50- end function layernorm_layer_cons
51-
52- pure module subroutine forward(self, input)
53- class(layernorm_layer), intent (in out ) :: self
54- real , intent (in ) :: input(:, :)
55- real , allocatable :: normalized(:, :)
56- integer :: i
57-
58- allocate (normalized(self % sequence_length, self % model_dimension))
59-
60- ! mu = x - MEAN_last_dim(x)
61- do concurrent(i = 1 : self % model_dimension)
62- self % mu(:, i) = input(:, i) - (sum (input, dim= 2 ) / self % model_dimension)
63- end do
64-
65- ! square root of variance shifted be eps
66- self % sigma = sqrt ((sum (self % mu ** 2 , dim= 2 ) / self % model_dimension) + self % eps)
67-
68- ! normalize mu by variance by first axis
69- do concurrent(i = 1 : self % model_dimension)
70- normalized(:, i) = self % mu(:, i) / self % sigma
71- end do
72-
73- ! forward through trainable params gamma and beta
74- do concurrent(i = 1 : self % sequence_length)
75- self % output(i, :) = normalized(i, :) * self % gamma + self % beta
76- end do
77-
78- deallocate (normalized)
79- end subroutine forward
80-
81- pure module subroutine backward(self, input, gradient)
82- class(layernorm_layer), intent (in out ) :: self
83- real , intent (in ) :: input(:, :)
84- real , intent (in ) :: gradient(:, :)
85- real , allocatable :: one_over_sigma(:, :)
86- real , allocatable :: gradient_by_gamma_over_sigma(:, :)
87-
88- allocate (one_over_sigma(self % sequence_length, self % model_dimension))
89- allocate (gradient_by_gamma_over_sigma(self % sequence_length, self % model_dimension))
90-
91- one_over_sigma = (1 / spread (self % sigma, dim= 2 , ncopies= self % model_dimension))
92- gradient_by_gamma_over_sigma = &
93- gradient &
94- * spread (self % gamma, dim= 1 , ncopies= self % sequence_length) &
95- * one_over_sigma
96-
97- ! d_output/d_gamma = sum(d_output/d_y * mu/sigma)
98- self % d_gamma = sum (gradient * self % mu * one_over_sigma, dim= 1 )
99-
100- ! d_output/d_beta = sum(d_output/d_y) * 1
101- self % d_beta = sum (gradient, dim= 1 )
102-
103- ! From this article:
104- ! https://robotchinwag.com/posts/layer-normalization-deriving-the-gradient-for-the-backward-pass/
105- ! d_output/d_x = d_output/d_y * gamma/sigma
106- ! - d_output/d_y
107- ! - sum(d_output/d_y * gamma/sigma) / len
108- ! - mu * sum(d_output/d_y * gamma * mu * sigma^(03)) / len
109- self % gradient = &
110- gradient_by_gamma_over_sigma &
111- - spread (&
112- sum (gradient_by_gamma_over_sigma, dim= 2 ),&
113- dim= 2 ,&
114- ncopies= self % model_dimension&
115- ) / self % model_dimension &
116- - self % mu * spread (&
117- sum (gradient_by_gamma_over_sigma * self % mu * (one_over_sigma ** 2 ), dim= 2 ),&
118- dim= 2 ,&
119- ncopies= self % model_dimension&
120- ) / self % model_dimension
121-
122- deallocate (one_over_sigma)
123- deallocate (gradient_by_gamma_over_sigma)
124- end subroutine backward
125-
126- module subroutine init (self , input_shape )
127- class(layernorm_layer), intent (in out ) :: self
128- integer , intent (in ) :: input_shape(:)
129-
130- if (size (input_shape) /= 2 ) then
131- error stop " LayerNorm Layer accepts 2D input"
132- end if
133- self % sequence_length = input_shape(1 )
134- self % model_dimension = input_shape(2 )
135-
136- ! default initialization from PyTorch
137- allocate (self % gamma(self % model_dimension))
138- self % gamma = 1 .
139- allocate (self % beta(self % model_dimension))
140- self % beta = 0 .
141-
142- allocate (self % d_gamma(self % model_dimension))
143- allocate (self % d_beta(self % model_dimension))
144- allocate (self % gradient(self % sequence_length, self % model_dimension))
145-
146- allocate (self % mu(self % sequence_length, self % model_dimension))
147- allocate (self % sigma(self % sequence_length))
148-
149- allocate (self % output(self % sequence_length, self % model_dimension))
150- end subroutine init
44+ interface
45+ pure module subroutine forward(self, input)
46+ class(layernorm_layer), intent (in out ) :: self
47+ real , intent (in ) :: input(:, :)
48+ end subroutine forward
49+
50+ pure module subroutine backward(self, input, gradient)
51+ class(layernorm_layer), intent (in out ) :: self
52+ real , intent (in ) :: input(:, :)
53+ real , intent (in ) :: gradient(:, :)
54+ end subroutine backward
55+
56+ module subroutine init (self , input_shape )
57+ class(layernorm_layer), intent (in out ) :: self
58+ integer , intent (in ) :: input_shape(:)
59+ end subroutine init
60+ end interface
15161end module nf_layernorm_layer
0 commit comments