Skip to content

Commit

Permalink
Centred RMSProp (#51)
Browse files Browse the repository at this point in the history
* Centred RMSProp

* try making this a keyword

* description, show

* add a d to keyword

* fixup

* require recent Zygote
  • Loading branch information
mcabbott authored May 23, 2022
1 parent 33c8144 commit 2bf9e60
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 7 deletions.
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[compat]
ChainRulesCore = "1"
Functors = "0.2.8"
Zygote = "0.6.40"
julia = "1.6"

[extras]
Expand Down
32 changes: 25 additions & 7 deletions src/rules.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,37 +84,55 @@ function apply!(o::Nesterov, state, x, dx)
end

"""
RMSProp(η = 1f-3, ρ = 9f-1, ϵ = eps(typeof(η)))
RMSProp(η = 1f-3, ρ = 9f-1, ϵ = eps(typeof(η)); centred = false)
Optimizer using the
[RMSProp](https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
algorithm. Often a good choice for recurrent networks. Parameters other than learning rate
generally don't need tuning.
[Centred RMSProp](http://arxiv.org/abs/1308.08500) is a variant which normalises
gradients by an estimate their variance, instead of their second moment.
# Parameters
- Learning rate (`η`): Amount by which gradients are discounted before updating
the weights.
- Momentum (`ρ`): Controls the acceleration of gradient descent in the
prominent direction, in effect dampening oscillations.
- Machine epsilon (`ϵ`): Constant to prevent division by zero
(no need to change default)
- Keyword `centred` (or `centered`): Indicates whether to use centred variant
of the algorithm.
"""
struct RMSProp{T}
eta::T
rho::T
epsilon::T
centred::Bool
end
RMSProp= 1f-3, ρ = 9f-1, ϵ = eps(typeof(η))) = RMSProp{typeof(η)}(η, ρ, ϵ)
RMSProp= 1f-3, ρ = 9f-1, ϵ = eps(typeof(η)); centred::Bool = false, centered::Bool = false) =
RMSProp{typeof(η)}(η, ρ, ϵ, centred | centered)

init(o::RMSProp, x::AbstractArray) = zero(x)
init(o::RMSProp, x::AbstractArray) = (zero(x), o.centred ? zero(x) : false)

function apply!(o::RMSProp, state, x, dx)
η, ρ, ϵ, acc = o.eta, o.rho, o.epsilon, state
η, ρ, ϵ = o.eta, o.rho, o.epsilon
quad, lin = state

@.. acc = ρ * acc + (1 - ρ) * abs2(dx)
dx′ = @lazy dx */ (sqrt(acc) + ϵ))
@.. quad = ρ * quad + (1 - ρ) * abs2(dx)
if o.centred
@.. lin = ρ * lin + (1 - ρ) * dx
end
dx′ = @lazy dx * η / (sqrt(quad - abs2(lin)) + ϵ)

return acc, dx′
return (quad, lin), dx′
end

function Base.show(io::IO, o::RMSProp)
show(io, typeof(o))
print(io, "(")
join(io, [o.eta, o.rho, o.epsilon], ", ")
print(io, "; centred = ", o.centred, ")")
end

"""
Expand Down
3 changes: 3 additions & 0 deletions test/rules.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@ RULES = [
OptimiserChain(ClipNorm(), Adam(0.001)),
OptimiserChain(ClipGrad(0.5), Momentum()),
OptimiserChain(WeightDecay(), OAdam(), ClipGrad(1)),
# Not the default:
RMSProp(centred = true),
]

name(o) = typeof(o).name.name # just for printing testset headings
name(o::OptimiserChain) = join(name.(o.opts), "")
name(o::RMSProp) = o.centred ? "RMSProp(centred = true)" : :RMSProp

LOG = Dict() # for debugging these testsets, this makes it easy to plot each optimiser's loss

Expand Down

2 comments on commit 2bf9e60

@mcabbott
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/60870

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.5 -m "<description of version>" 2bf9e607b68e24b4e5dd842b2184893f3ff0ecc2
git push origin v0.2.5

Please sign in to comment.