diff --git a/changelog.md b/changelog.md
index 60ffe1c..2265348 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,3 +1,33 @@
+# v0.8.0 - 09.05.2022
+## Optimization has joined the chat
+Multi-variate optimization and differentiation has been introduced.
+
+- `numericalnim/differentiate` offers `tensorGradient(f, x)` which calculates the gradient of `f` w.r.t `x` using finite differences, `tensorJacobian` (returns the transpose of the gradient), `tensorHessian`, `mixedDerivative`. It also provides `checkGradient(f, analyticGrad, x, tol)` to verify that the analytic gradient is correct by comparing it to the finite difference approximation.
+- `numericalnim/optimize` now has several multi-variate optimization methods:
+  - `steepestDescent`
+  - `newton`
+  - `bfgs`
+  - `lbfgs`
+  - They all have the function signatures like:
+    ```nim
+    proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U]
+    ```
+    where `f` is the function to be minimized, `x0` is the starting guess, `options` contain options like tolerance (each method has it own options type which can be created by for example `lbfgsOptions` or `newtonOptions`), `analyticGradient` can be supplied to avoid having to do finite difference approximations of the derivatives.
+  - There are 4 different line search methods supported and those are set in the `options`: `Armijo, Wolfe, WolfeStrong, NoLineSearch`.
+  - `levmarq`: non-linear least-square optimizer
+    ```nim
+    proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U]
+    ```
+    - `f` is the function you want to fit to the parameters in `param` and `x` is the value to evaluate the function at. 
+    - `params0` is the initial guess for the parameters
+    - `xData` is a 1D Tensor with the x points and `yData` is a 1D Tensor with the y points.
+    - `options` can be created using `levmarqOptions`.
+    - Returns the final parameters
+  
+
+Note: There are basic tests to ensure these methods converge for simple problems, but they are not tested on more complex problems and should be considered experimental until more tests have been done. Please try them out, but don't rely on them for anything important for now. Also, the API isn't set in stone yet so expect that it may change in future versions.
+
+
 # v0.7.1 -25.01.2022
 
 Add a `nimCI` task for the Nim CI to run now that the tests have external dependencies.
diff --git a/numericalnim.nimble b/numericalnim.nimble
index e544b42..f915240 100644
--- a/numericalnim.nimble
+++ b/numericalnim.nimble
@@ -1,5 +1,5 @@
 # Package Information
-version = "0.7.1"
+version = "0.8.0"
 author = "Hugo Granström"
 description = "A collection of numerical methods written in Nim. Current features: integration, ode, optimization."
 license = "MIT"
diff --git a/src/numericalnim.nim b/src/numericalnim.nim
index 905ec3a..8308749 100644
--- a/src/numericalnim.nim
+++ b/src/numericalnim.nim
@@ -8,5 +8,7 @@ import numericalnim/optimize
 export optimize
 import numericalnim/interpolate
 export interpolate
+import numericalnim/differentiate
+export differentiate
 import ./numericalnim/common/commonTypes
 export commonTypes
diff --git a/src/numericalnim/differentiate.nim b/src/numericalnim/differentiate.nim
new file mode 100644
index 0000000..733f018
--- /dev/null
+++ b/src/numericalnim/differentiate.nim
@@ -0,0 +1,199 @@
+import std/strformat
+import arraymancer
+
+proc diff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
+  ## Numerically calculate the derivative of f(x) at x0 using a step size h.
+  ## Uses forward difference which has accuracy O(h)
+  result = (f(x0 + h) - f(x0)) / h
+
+proc diff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
+  ## Numerically calculate the derivative of f(x) at x0 using a step size h.
+  ## Uses backward difference which has accuracy O(h)
+  result = (f(x0) - f(x0 - h)) / h
+
+proc diff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
+  ## Numerically calculate the derivative of f(x) at x0 using a step size h.
+  ## Uses central difference which has accuracy O(h^2)
+  result = (f(x0 + h) - f(x0 - h)) / (2*h)
+
+proc secondDiff1dForward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
+  ## Numerically calculate the second derivative of f(x) at x0 using a step size h.
+  result = (f(x0 + 2*h) - 2*f(x0 + h) + f(x0)) / (h*h)
+
+proc secondDiff1dBackward*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
+  ## Numerically calculate the second derivative of f(x) at x0 using a step size h.
+  result = (f(x0) - 2*f(x0 - h) + f(x0 - 2*h)) / (h*h)
+
+proc secondDiff1dCentral*[U, T](f: proc(x: U): T, x0: U, h: U = U(1e-6)): T =
+  ## Numerically calculate the second derivative of f(x) at x0 using a step size h.
+  ## Uses central difference which has accuracy O(h^2)
+  result = (f(x0 + h) - 2*f(x0) + f(x0 - h)) / (h*h)
+
+proc tensorGradient*[U; T: not Tensor](
+    f: proc(x: Tensor[U]): T,
+    x0: Tensor[U],
+    h: U = U(1e-6),
+    fastMode: bool = false
+  ): Tensor[T] =
+  ## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h.
+  ## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
+  ## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
+  assert x0.rank == 1 # must be a 1d vector
+  let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference?
+  let xLen = x0.shape[0]
+  result = newTensor[T](xLen)
+  var x = x0.clone()
+  for i in 0 ..< xLen:
+    x[i] += h
+    let fPlusH = f(x)
+    if fastMode:
+      x[i] -= h # restore to original
+      result[i] = (fPlusH - f0) / h
+    else:
+      x[i] -= 2*h
+      let fMinusH = f(x)
+      x[i] += h # restore to original (± float error)
+      result[i] = (fPlusH - fMinusH) / (2 * h)
+
+proc tensorGradient*[U, T](
+    f: proc(x: Tensor[U]): Tensor[T],
+    x0: Tensor[U],
+    h: U = U(1e-6),
+    fastMode: bool = false
+  ): Tensor[T] =
+  ## Calculates the gradient of f(x) w.r.t vector x at x0 using step size h.
+  ## Every column is the gradient of one component of f.
+  ## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
+  ## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
+  assert x0.rank == 1 # must be a 1d vector
+  let f0 = f(x0) # make use of this with a `fastMode` switch so we use forward difference instead of central difference?
+  assert f0.rank == 1
+  let rows = x0.shape[0]
+  let cols = f0.shape[0]
+  result = newTensor[T](rows, cols)
+  var x = x0.clone()
+  for i in 0 ..< rows:
+    x[i] += h
+    let fPlusH = f(x)
+    if fastMode:
+      x[i] -= h # restore to original
+      result[i, _] = ((fPlusH - f0) / h).reshape(1, cols)
+    else:
+      x[i] -= 2*h
+      let fMinusH = f(x)
+      x[i] += h # restore to original (± float error)
+      result[i, _] = ((fPlusH - fMinusH) / (2 * h)).reshape(1, cols)
+
+proc tensorJacobian*[U, T](
+    f: proc(x: Tensor[U]): Tensor[T],
+    x0: Tensor[U],
+    h: U = U(1e-6),
+    fastMode: bool = false
+  ): Tensor[T] =
+    ## Calculates the jacobian of f(x) w.r.t vector x at x0 using step size h.
+    ## Every row is the gradient of one component of f.
+    ## By default it uses central difference for approximating the derivatives. This requires two function evaluations per derivative.
+    ## When fastMode is true it will instead use the forward difference which only uses 1 function evaluation per derivative but is less accurate.
+    transpose(tensorGradient(f, x0, h, fastMode))
+
+proc mixedDerivative*[U, T](f: proc(x: Tensor[U]): T, x0: var Tensor[U], indices: (int, int), h: U = U(1e-6)): T =
+  result = 0
+  let i = indices[0]
+  let j = indices[1]
+  # f(x+h, y+h)
+  x0[i] += h
+  x0[j] += h
+  result += f(x0)
+
+  # f(x+h, y-h)
+  x0[j] -= 2*h
+  result -= f(x0)
+
+  # f(x-h, y-h)
+  x0[i] -= 2*h
+  result += f(x0)
+
+  # f(x-h, y+h)
+  x0[j] += 2*h
+  result -= f(x0)
+
+  # restore x0
+  x0[i] += h
+  x0[j] -= h
+
+  result *= 1 / (4 * h*h)
+  
+
+proc tensorHessian*[U; T: not Tensor](
+    f: proc(x: Tensor[U]): T,
+    x0: Tensor[U],
+    h: U = U(1e-6)
+  ): Tensor[T] =
+    assert x0.rank == 1 # must be a 1d vector
+    let f0 = f(x0)
+    let xLen = x0.shape[0]
+    var x = x0.clone()
+    result = zeros[T](xLen, xLen)
+    for i in 0 ..< xLen:
+      for j in i ..< xLen:
+        let mixed = mixedDerivative(f, x, (i, j), h)
+        result[i, j] = mixed
+        result[j, i] = mixed
+
+proc checkGradient*[U; T: not Tensor](f: proc(x: Tensor[U]): T, fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool =
+  ## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient.
+  let numGrad = tensorGradient(f, x0)
+  let grad = fGrad(x0)
+  result = true
+  for i, x in abs(numGrad - grad):
+    if x > tol:
+      echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})"
+      result = false
+
+proc checkGradient*[U; T](f: proc(x: Tensor[U]): Tensor[T], fGrad: proc(x: Tensor[U]): Tensor[T], x0: Tensor[U], tol: T): bool =
+  ## Checks if the provided gradient function `fGrad` gives the same values as numeric gradient.
+  let numGrad = tensorGradient(f, x0)
+  let grad = fGrad(x0)
+  result = true
+  for i, x in abs(numGrad - grad):
+    if x > tol:
+      echo fmt"Gradient at index {i[0]} has error: {x} (tol = {tol})"
+      result = false
+
+
+
+when isMainModule:
+  import std/math
+  import benchy
+  proc f1(x: Tensor[float]): Tensor[float] =
+    x.sum(0)
+  let x0 = ones[float](10)
+  echo tensorGradient(f1, x0, 1e-6)
+  echo tensorGradient(f1, x0, 1e-6, true)
+  echo tensorJacobian(f1, x0, 1e-6)
+
+  proc f2(x: Tensor[float]): float =
+    sum(x)
+  echo tensorGradient(f2, x0, 1e-6)
+  echo tensorGradient(f2, x0, 1e-6, true)
+
+  let N = 1000
+  timeIt "slow mode":
+    for i in 0 .. N:
+      keep tensorGradient(f1, x0, 1e-6, false)
+  timeIt "fast mode":
+    for i in 0 .. N:
+      keep tensorGradient(f1, x0, 1e-6, true)
+  timeIt "slow mode float":
+    for i in 0 .. N:
+      keep tensorGradient(f2, x0, 1e-6, false)
+  timeIt "fast mode float":
+    for i in 0 .. N:
+      keep tensorGradient(f2, x0, 1e-6, true)
+  timeIt "jacobian slow":
+    for i in 0 .. N:
+      keep tensorJacobian(f1, x0, 1e-6, false)
+    
+
+
+
diff --git a/src/numericalnim/optimize.nim b/src/numericalnim/optimize.nim
index af03c93..18a0963 100644
--- a/src/numericalnim/optimize.nim
+++ b/src/numericalnim/optimize.nim
@@ -1,7 +1,6 @@
-import strformat
+import std/[strformat, sequtils, math, deques]
 import arraymancer
-import sequtils
-import math
+import ./differentiate
 
 proc steepest_descent*(deriv: proc(x: float64): float64, start: float64, gamma: float64 = 0.01, precision: float64 = 1e-5, max_iters: Natural = 1000):float64 {.inline.} =
     ## Gradient descent optimization algorithm for finding local minimums of a function with derivative 'deriv'
@@ -120,9 +119,494 @@ proc secant*(f: proc(x: float64): float64, start: array[2, float64], precision:
             raise newException(ArithmeticError, "Maximum iterations for Secant method exceeded")
     return xCurrent
 
+##############################
+## Multidimensional methods ##
+##############################
+
+type LineSearchCriterion = enum
+    Armijo, Wolfe, WolfeStrong, NoLineSearch
+
+type
+    OptimOptions*[U, AO] = object
+        tol*, alpha*: U
+        fastMode*: bool
+        maxIterations*: int
+        lineSearchCriterion*: LineSearchCriterion
+        algoOptions*: AO
+    StandardOptions* = object
+    LevMarqOptions*[U] = object
+        lambda0*: U
+    LBFGSOptions*[U] = object
+        savedIterations*: int
+
+proc optimOptions*[U](tol: U = U(1e-6), alpha: U = U(1), lambda0: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    result.tol = tol
+    result.alpha = alpha
+    result.lambda0 = lambda0
+    result.fastMode = fastMode
+    result.maxIterations = maxIterations
+    result.lineSearchCriterion = lineSearchCriterion
+
+proc steepestDescentOptions*[U](tol: U = U(1e-6), alpha: U = U(0.001), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    result.tol = tol
+    result.alpha = alpha
+    result.fastMode = fastMode
+    result.maxIterations = maxIterations
+    result.lineSearchCriterion = lineSearchCriterion
+
+proc newtonOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    result.tol = tol
+    result.alpha = alpha
+    result.fastMode = fastMode
+    result.maxIterations = maxIterations
+    result.lineSearchCriterion = lineSearchCriterion
+
+proc bfgsOptions*[U](tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, StandardOptions] =
+    result.tol = tol
+    result.alpha = alpha
+    result.fastMode = fastMode
+    result.maxIterations = maxIterations
+    result.lineSearchCriterion = lineSearchCriterion
+
+proc lbfgsOptions*[U](savedIterations: int = 10, tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LBFGSOptions[U]] =
+    result.tol = tol
+    result.alpha = alpha
+    result.fastMode = fastMode
+    result.maxIterations = maxIterations
+    result.lineSearchCriterion = lineSearchCriterion
+    result.algoOptions.savedIterations = savedIterations
+
+proc levmarqOptions*[U](lambda0: U = U(1), tol: U = U(1e-6), alpha: U = U(1), fastMode: bool = false, maxIterations: int = 10000, lineSearchCriterion: LineSearchCriterion = NoLineSearch): OptimOptions[U, LevMarqOptions[U]] =
+    result.tol = tol
+    result.alpha = alpha
+    result.fastMode = fastMode
+    result.maxIterations = maxIterations
+    result.lineSearchCriterion = lineSearchCriterion
+    result.algoOptions.lambda0 = lambda0
+
+
+
+
+proc vectorNorm*[T](v: Tensor[T]): T =
+    ## Calculates the norm of the vector, ie the sqrt(Σ vᵢ²)
+    assert v.rank == 1, "v must be a 1d vector!"
+    result = sqrt(v.dot(v))
+
+proc eye[T](n: int): Tensor[T] =
+    result = zeros[T](n, n)
+    for i in 0 ..< n:
+        result[i, i] = 1
+
+proc line_search*[U, T](alpha: var U, p: Tensor[T], x0: Tensor[U], f: proc(x: Tensor[U]): T, criterion: LineSearchCriterion, fastMode: bool = false) =
+    # note: set initial alpha for the methods as 1 / sqrt(dot(grad, grad)) so first step has length 1.
+    if criterion == NoLineSearch:
+        return
+    var gradient = tensorGradient(f, x0, fastMode=fastMode)
+    let dirDerivInit = dot(gradient, p)
+
+    if 0 < dirDerivInit:
+        # p is pointing uphill, use whatever alpha we currently have.
+        return
+
+    let fInit = f(x0)
+    var counter = 0
+    alpha = 1
+    while counter < 20:
+        let x = x0 + alpha * p
+        let fx = f(x)
+        gradient = tensorGradient(f, x, fastMode=fastMode)
+        counter += 1
+
+        if fx > fInit + 1e-4*alpha * dirDerivInit: # c1 = 1e-4 multiply as well? Doesn't seem to work
+            alpha *= 0.5
+        else:
+            if criterion == Armijo:
+                return
+            let dirDeriv = dot(gradient, p)
+            if dirDeriv < 0.9 * dirDerivInit:
+                alpha *= 2.1
+            else:
+                if criterion == Wolfe:
+                    return
+                if dirDeriv > -0.9*dirDerivInit:
+                    alpha *= 0.5
+                else:
+                    return
+        if alpha < 1e-3:
+            alpha = 1e-3
+            return
+        elif alpha > 1e2:
+            alpha = 1e2
+            return
+
+template analyticOrNumericGradient(analytic, f, x, options: untyped): untyped =
+    if analytic.isNil:
+        tensorGradient(f, x, fastMode=options.fastMode)
+    else:
+        analytic(x)
+
+proc steepestDescent*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = steepestDescentOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    ## Minimize scalar-valued function f. 
+    var alpha = options.alpha
+    var x = x0.clone()
+    var fNorm = abs(f(x0))
+    var gradient = analyticOrNumericGradient(analyticGradient, f, x0, options) #tensorGradient(f, x0, fastMode=options.fastMode)
+    var gradNorm = vectorNorm(gradient)
+    var iters: int
+    while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
+        let p = -gradient
+        line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
+        x += alpha * p
+        let fx = f(x)
+        fNorm = abs(fx)
+        gradient = analyticOrNumericGradient(analyticGradient, f, x, options) #tensorGradient(f, x, fastMode=options.fastMode)
+        gradNorm = vectorNorm(gradient)
+        iters += 1
+    if iters >= 10000:
+        discard "Limit of 10000 iterations reached!"
+    #echo iters, " iterations done!"
+    result = x
+
+proc newton*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = newtonOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    var alpha = options.alpha
+    var x = x0.clone()
+    var fNorm = abs(f(x))
+    var gradient = analyticOrNumericGradient(analyticGradient, f, x0, options)
+    var gradNorm = vectorNorm(gradient)
+    var hessian = tensorHessian(f, x)
+    var iters: int
+    while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
+        let p = -solve(hessian, gradient)
+        line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
+        x += alpha * p
+        let fx = f(x)
+        fNorm = abs(fx)
+        gradient = analyticOrNumericGradient(analyticGradient, f, x, options)
+        gradNorm = vectorNorm(gradient)
+        hessian = tensorHessian(f, x)
+        iters += 1
+    if iters >= 10000:
+        discard "Limit of 10000 iterations reached!"
+    #echo iters, " iterations done!"
+    result = x
+
+proc bfgs_old*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], alpha: U = U(1), tol: U = U(1e-6), fastMode: bool = false, analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    var x = x0.clone()
+    let xLen = x.shape[0]
+    var fNorm = abs(f(x))
+    var gradient = 0.01*tensorGradient(f, x, fastMode=fastMode)
+    var gradNorm = vectorNorm(gradient)
+    var hessianB = eye[T](xLen) # inverse of the approximated hessian
+    var iters: int
+    while gradNorm > tol*(1 + fNorm) and iters < 10000:
+        #echo "Hessian iter ", iters, ": ", hessianB
+        let p = -hessianB * gradient.reshape(xLen, 1)
+        #echo "p iter ", iters, ": ", p
+        x += alpha * p
+        let newGradient = tensorGradient(f, x, fastMode=fastMode)
+        let sk = alpha * p.reshape(xLen, 1)
+        #gemm(1.0, hessianB, hessianB, 0.0, newGradient)
+        let yk = (newGradient - gradient).reshape(xLen, 1)
+        let sk_yk_dot = dot(sk.squeeze, yk.squeeze)
+        let prefactor1 = (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot)
+        let prefactor2 = 1 / sk_yk_dot
+        let m1 = (sk * sk.transpose)
+        let m2_1 = (hessianB * yk) * sk.transpose
+        let m2_2 = sk * (yk.transpose * hessianB)
+        #echo "prefactor2: ", prefactor2
+        hessianB += prefactor1 * m1
+        #echo "hessian2: ", hessianB
+        #echo "hessian: ", hessianB
+        #echo "yk: ", yk
+        hessianB -= prefactor2 * m2_1
+        #echo "checkpoint1: ", (hessianB * yk)
+        #echo "hessian2.5: ", hessianB
+        hessianB -= prefactor2 * m2_2
+        #echo "hessian3: ", hessianB
+
+        gradient = newGradient
+        let fx = f(x)
+        fNorm = abs(fx)
+        gradNorm = vectorNorm(gradient)
+        iters += 1
+    if iters >= 10000:
+        discard "Limit of 10000 iterations reached!"
+    #echo iters, " iterations done!"
+    result = x
+
+proc bfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], options: OptimOptions[U, StandardOptions] = bfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    # Use gemm and gemv with preallocated Tensors and setting beta = 0
+    var alpha = options.alpha
+    var x = x0.clone()
+    let xLen = x.shape[0]
+    var fNorm = abs(f(x))
+    var gradient = 0.01*analyticOrNumericGradient(analyticGradient, f, x0, options)
+    var gradNorm = vectorNorm(gradient)
+    var hessianB = eye[T](xLen) # inverse of the approximated hessian
+    var p = newTensor[U](xLen)
+    var tempVector1 = newTensor[U](xLen, 1)
+    var tempVector2 = newTensor[U](1, xLen)
+    var iters: int
+    while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
+        # We are using hessianB in calculating it so we are modifying it prior to its use!
+
+
+        #echo "Hessian iter ", iters, ": ", hessianB
+        #let p = -hessianB * gradient.reshape(xLen, 1)
+        gemv(-1.0, hessianB, gradient.reshape(xLen, 1), 0.0, p)
+        #echo "p iter ", iters, ": ", p
+        #echo "x iter ", iters, ": ", x
+        #echo "gradient iter ", iters, ": ", gradient
+        line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
+        x += alpha * p
+        let newGradient = analyticOrNumericGradient(analyticGradient, f, x, options) #tensorGradient(f, x, fastMode=options.fastMode)
+        let sk = alpha * p.reshape(xLen, 1)
+        
+        let yk = (newGradient - gradient).reshape(xLen, 1)
+        let sk_yk_dot = dot(sk.squeeze, yk.squeeze)
+        # gemm(1.0, hessianB, hessianB, 0.0, newGradient)
+        # Do the calculation in steps, minimizing allocations
+        # sk * sk.transpose is matrix × matrix
+        # how to deal with the vector.T × vector = Matrix? Use gemm? gemm() can be used as += if beta is set to 1.0
+        #echo "aim: ", hessianB + (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
+        #echo "real Hessian: ", hessianB + (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose)
+        let prefactor1 = (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot)
+        let prefactor2 = 1 / sk_yk_dot
+        gemv(1.0, hessianB, yk, 0.0, tempVector1) # temp1 = hessianB * yk
+        gemm(1.0, yk.transpose, hessianB, 0.0, tempVector2) # temp2 = yk.transpose * hessianB
+
+        gemm(prefactor1, sk, sk.transpose, 1.0, hessianB) # hessianB += prefactor1 * sk * sk.transpose
+
+        gemm(-prefactor2, tempVector1, sk.transpose, 1.0, hessianB) # hessianB -= prefactor2 * temp1 * sk.transpose
+
+        gemm(-prefactor2, sk, tempVector2, 1.0, hessianB) # hessianB -= prefactor2 * sk * temp2
+        #echo "hessian2: ", hessianB # This is correct
+        
+        # somewhere down here the error occurs!↓
+
+        # reuse vector p:
+        #gemv(1.0, hessianB, yk, 0.0, tempVector1) # temp1 = hessianB * yk
+        #echo "checkpoint1: ", tempVector1 # this is incorrect!!!
+
+        #
+        #
+        # Rewrite with transposes as gemv instead!
+        # (A * B)^T = B^T * A^T → yk.transpose * hessianB = transpose(hessianB.transpose * yk)
+        #
+
+        #gemm(1.0, yk.transpose, hessianB, 0.0, p) 
+        #gemv(1.0, hessianB.transpose, yk, 0.0, tempVector) # p = yk.transpose * hessianB
+        #tempVector = tempVector.transpose
+
+        
+        #echo "hessian3: ", hessianB
+
+        #hessianB += (sk_yk_dot + dot(yk.squeeze, squeeze(hessianB * yk))) / (sk_yk_dot * sk_yk_dot) * (sk * sk.transpose) - ((hessianB * yk) * sk.transpose + sk * (yk.transpose * hessianB)) / sk_yk_dot
+        #tempVector = tempVector.transpose # reverse it back to column vector
+        
+        gradient = newGradient
+        let fx = f(x)
+        fNorm = abs(fx)
+        gradNorm = vectorNorm(gradient)
+        iters += 1
+    if iters >= 10000:
+        discard "Limit of 10000 iterations reached!"
+    #echo iters, " iterations done!"
+    result = x
+
+proc lbfgs*[U; T: not Tensor](f: proc(x: Tensor[U]): T, x0: Tensor[U], m: int = 10, options: OptimOptions[U, LBFGSOptions[U]] = lbfgsOptions[U](), analyticGradient: proc(x: Tensor[U]): Tensor[T] = nil): Tensor[U] =
+    var alpha = options.alpha
+    var x = x0.clone()
+    let xLen = x.shape[0]
+    var fNorm = abs(f(x))
+    var gradient = 0.01*analyticOrNumericGradient(analyticGradient, f, x0, options)
+    var gradNorm = vectorNorm(gradient)
+    var iters: int
+    #let m = 10 # number of past iterations to save
+    var sk_queue = initDeque[Tensor[U]](m)
+    var yk_queue = initDeque[Tensor[T]](m)
+    # the problem is the first iteration as the gradient is huge and no adjustments are made
+    while gradNorm > options.tol*(1 + fNorm) and iters < 10000:
+        #echo "grad: ", gradient
+        #echo "x: ", x
+        var q = gradient.clone()
+        # we want to loop from latest inserted to oldest
+        # → we insert at the beginning and pop at the end
+        var alphas: seq[U]
+        for i in 0 ..< sk_queue.len:
+            let rho_i = 1 / dot(sk_queue[i], yk_queue[i])
+            let alpha_i = rho_i * dot(sk_queue[i], q)
+            q -= alpha_i * yk_queue[i]
+            alphas.add alpha_i
+        let gamma = if sk_queue.len == 0: 1.0 else: dot(sk_queue[0], yk_queue[0]) / dot(yk_queue[0], yk_queue[0])
+        #echo gamma
+        var z = gamma * q
+        for i in countdown(sk_queue.len - 1, 0):
+            let rho_i = 1 / dot(sk_queue[i], yk_queue[i])
+            let beta_i = rho_i * dot(yk_queue[i], z)
+            let alpha_i = alphas[i]
+            z += sk_queue[i] * (alpha_i - beta_i)
+        z = -z
+        let p = z
+        #echo "q: ", q
+        line_search(alpha, p, x, f, options.lineSearchCriterion, options.fastMode)
+        x += alpha * p
+        sk_queue.addFirst alpha*p
+        let newGradient = analyticOrNumericGradient(analyticGradient, f, x, options)
+        let yk = newGradient - gradient
+        yk_queue.addFirst yk
+        gradient = newGradient
+        let fx = f(x)
+        fNorm = abs(fx)
+        gradNorm = vectorNorm(gradient)
+        if sk_queue.len > m: discard sk_queue.popLast
+        if yk_queue.len > m: discard yk_queue.popLast
+        iters += 1
+
+    if iters >= 10000:
+        discard "Limit of 10000 iterations reached!"
+    #echo iters, " iterations done!"
+    result = x
+
+proc levmarq*[U; T: not Tensor](f: proc(params: Tensor[U], x: U): T, params0: Tensor[U], xData: Tensor[U], yData: Tensor[T], options: OptimOptions[U, LevmarqOptions[U]] = levmarqOptions[U]()): Tensor[U] =
+    assert xData.rank == 1
+    assert yData.rank == 1
+    assert params0.rank == 1
+    let xLen = xData.shape[0]
+    let yLen = yData.shape[0]
+    let paramsLen = params0.shape[0]
+    assert xLen == yLen
+
+    let residualFunc = # proc that returns the residual vector
+        proc (params: Tensor[U]): Tensor[T] =
+            result = map2_inline(xData, yData):
+                f(params, x) - y
+
+    let errorFunc = # proc that returns the scalar error
+        proc (params: Tensor[U]): T =
+            let res = residualFunc(params)
+            result = dot(res, res)
+
+    var lambdaCoeff = options.algoOptions.lambda0
+
+    var params = params0.clone()
+    var gradient = tensorGradient(residualFunc, params, fastMode=options.fastMode)
+    var residuals = residualFunc(params)
+    var resNorm = vectorNorm(residuals)
+    var gradNorm = vectorNorm(squeeze(gradient * residuals.reshape(xLen, 1)))
+    var iters: int
+    let eyeNN = eye[T](paramsLen)
+    while gradNorm > options.tol*(1 + resNorm) and iters < 10000:
+        let rhs = -gradient * residuals.reshape(xLen, 1)
+        let lhs = gradient * gradient.transpose + lambdaCoeff * eyeNN
+        let p = solve(lhs, rhs)
+        params += p * options.alpha
+        gradient = tensorGradient(residualFunc, params, fastMode=options.fastMode)
+        residuals = residualFunc(params)
+        let newGradNorm = vectorNorm(squeeze(gradient * residuals.reshape(xLen, 1)))
+        if newGradNorm / gradNorm < 0.9: # we have improved, decrease lambda → more Gauss-Newton
+            lambdaCoeff = max(lambdaCoeff / 3, 1e-4)
+        elif newGradNorm / gradNorm > 1.2: # we have done worse than last ste, increase lambda → more Steepest descent
+            lambdaCoeff = min(lambdaCoeff * 2, 20)
+        # else: don't change anything
+        gradNorm = newGradNorm
+        resNorm = vectorNorm(residuals)
+        iters += 1
+    if iters == 10000:
+        echo "levmarq reached maximum number of iterations!"
+    result = params
+
+
+        
+when isMainModule:
+    import benchy
+    # Steepest descent:
+    proc f1(x: Tensor[float]): float =
+        # result = -20*exp(-0.2*sqrt(0.5 * (x[0]*x[0] + x[1]*x[1]))) - exp(0.5*(cos(2*PI*x[0]) + cos(2*PI*x[1]))) + E + 20 # Ackley
+        result = (1 - x[0])^2 + 100*(x[1] - x[0]^2)^2
+        if x.shape[0] > 2:
+            for ix in x[2 .. _]:
+                result += (ix - 1)^2
     
+    #let x0 = [-0.5, 2.0].toTensor
+    let x0 = ones[float](100) * -1
+    #[ let sol1 = steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true)
+    echo sol1
+    echo f1(sol1)
+    echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=false)
+    echo "Newton: ", newton(f1, x0, tol=1e-8, fastMode=true)
+    echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
+    echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
+    echo "LBFGS:", lbfgs(f1, x0, tol=1e-8, fastMode=false)
+    echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=false)
+    echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
+    echo "BFGS: ", bfgs(f1, x0, tol=1e-8, fastMode=true)
+    echo "BFGS opt: ", bfgs_optimized(f1, x0, tol=1e-8, fastMode=true) ]#
+
+    #[ echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=None)
+    echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Armijo)
+    echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Wolfe)
+    echo bfgs_optimized(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=WolfeStrong)
+    echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=None)
+    echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Armijo)
+    echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=Wolfe)
+    echo lbfgs(f1, x0, tol=1e-8, alpha=0.001, fastMode=false, criterion=WolfeStrong) ]#
+
+    #[ timeIt "steepest slow mode None":
+        keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=None)
+
+    timeIt "steepest slow mode Armijo":
+        keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Armijo)
 
+    timeIt "steepest slow mode Wolfe":
+        keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Wolfe)
 
+    timeIt "steepest slow mode WolfeStrong":
+        keep bfgs_optimized(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong)
+
+    timeIt "steepest slow mode None":
+        keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=None)
+
+    timeIt "steepest slow mode Armijo":
+        keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Armijo)
+
+    timeIt "steepest slow mode Wolfe":
+        keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=Wolfe)
+
+    timeIt "steepest slow mode WolfeStrong":
+        keep lbfgs(f1, x0, tol=1e-8, alpha=1, fastMode=false, criterion=WolfeStrong) ]#
+#[     timeIt "newton slow mode":
+        keep newton(f1, x0, tol=1e-8, fastMode=false)
+    timeIt "newton fast mode":
+        keep newton(f1, x0, tol=1e-8, fastMode=true)
+    timeIt "bfgs slow mode":
+        keep bfgs(f1, x0, tol=1e-8, fastMode=false)
+    timeIt "bfgs fast mode":
+        keep bfgs(f1, x0, tol=1e-8, fastMode=true)
+    timeIt "lbfgs slow mode":
+        keep lbfgs(f1, x0, tol=1e-8, fastMode=false)
+    timeIt "lbfgs fast mode":
+        keep lbfgs(f1, x0, tol=1e-8, fastMode=true)
+    timeIt "optimized bfgs slow mode":
+        keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=false)
+    timeIt "optimized bfgs fast mode":
+        keep bfgs_optimized(f1, x0, tol=1e-8, fastMode=true) 
+    timeIt "steepest fast mode":
+        keep steepestDescent(f1, x0, tol=1e-8, alpha=0.001, fastMode=true) ]#
+
+
+    # Lev-Marq:
+#[     proc fFit(params: Tensor[float], x: float): float =
+        params[0] + params[1] * x + params[2] * x*x
+    
+    let xData = arraymancer.linspace(0, 10, 100)
+    let yData = 1.5 +. xData * 6.28 + xData *. xData * -5.79 + randomNormalTensor[float](xData.shape[0], 0.0, 0.1)
+    let params0 = [0.0, 0.0, 0.0].toTensor
+    echo levmarq(fFit, params0, xData, yData)
+    timeIt "slow mode":
+        keep levmarq(fFit, params0, xData, yData, fastMode=false)
+    timeIt "fast mode":
+        keep levmarq(fFit, params0, xData, yData, fastMode=true) ]#
 
 
 
diff --git a/tests/test_differentiate.nim b/tests/test_differentiate.nim
new file mode 100644
index 0000000..e150313
--- /dev/null
+++ b/tests/test_differentiate.nim
@@ -0,0 +1,155 @@
+import std/[unittest, math]
+import numericalnim
+import arraymancer
+
+proc f_1d(x: float): float = 2 * sin(x) * cos(x) # same as sin(2*x)
+proc exact_deriv_1d(x: float): float = 2*cos(2*x)
+proc exact_secondderiv_1d(x: float): float = -4*sin(2*x)
+
+suite "1D numeric differentiation":
+  test "Forward difference":
+    for x in numericalnim.linspace(0, 10, 100):
+      let numDiff = diff1dForward(f_1d, x)
+      let exact = exact_deriv_1d(x)
+      check abs(numDiff - exact) < 3e-6
+  
+  test "Backward difference":
+    for x in numericalnim.linspace(0, 10, 100):
+      let numDiff = diff1dBackward(f_1d, x)
+      let exact = exact_deriv_1d(x)
+      check abs(numDiff - exact) < 3e-6
+
+  test "Central difference":
+    for x in numericalnim.linspace(0, 10, 100):
+      let numDiff = diff1dCentral(f_1d, x)
+      let exact = exact_deriv_1d(x)
+      check abs(numDiff - exact) < 2e-9
+
+  test "Forward second difference":
+    for x in numericalnim.linspace(0, 10, 100):
+      let numDiff = secondDiff1dForward(f_1d, x)
+      let exact = exact_secondderiv_1d(x)
+      check abs(numDiff - exact) < 4e-3
+
+  test "Backward second difference":
+    for x in numericalnim.linspace(0, 10, 100):
+      let numDiff = secondDiff1dBackward(f_1d, x)
+      let exact = exact_secondderiv_1d(x)
+      check abs(numDiff - exact) < 4e-3
+
+  test "Central second difference":
+    for x in numericalnim.linspace(0, 10, 100):
+      let numDiff = secondDiff1dCentral(f_1d, x)
+      let exact = exact_secondderiv_1d(x)
+      check abs(numDiff - exact) < 4e-4
+
+proc fScalar(x: Tensor[float]): float =
+  # This will be a function of three variables
+  # f(x0, x1, x2) = x0^2 + 2 * x0 * x1 + sin(x2)
+  result = x[0]*x[0] + 2 * x[0] * x[1] + sin(x[2])
+
+proc scalarGradient(x: Tensor[float]): Tensor[float] =
+  # Gradient is (2*x0 + 2*x1, 2*x0, cos(x2))
+  result = zeros_like(x)
+  result[0] = 2*x[0] + 2*x[1]
+  result[1] = 2*x[0]
+  result[2] = cos(x[2])
+
+proc scalarHessian(x: Tensor[float]): Tensor[float] =
+  result = zeros[float](3, 3)
+  result[0,0] = 2
+  result[0,1] = 2
+  result[1,0] = 2
+  result[0,2] = 0
+  result[2,0] = 0
+  result[1,1] = 0
+  result[1,2] = 0
+  result[2,1] = 0
+  result[2,2] = -sin(x[2])
+
+proc fMultidim(x: Tensor[float]): Tensor[float] =
+  # Function will have 3 inputs and 2 outputs (important that they aren't the same for testing!)
+  # f(x0, x1, x2) = (x0*x1*x2^2, x1*sin(2*x2))
+  result = zeros[float](2)
+  result[0] = x[0]*x[1]*x[2]*x[2]
+  result[1] = x[1] * sin(2*x[2])
+
+proc multidimGradient(x: Tensor[float]): Tensor[float] =
+  # The gradient (Jacobian transposed) is:
+  # x1*x2^2     0
+  # x0*x2^2     sin(2*x2)
+  # 2*x0*x1*x2  2*x1*cos(2*x2)
+  result = zeros[float](3, 2)
+  result[0, 0] = x[1]*x[2]*x[2]
+  result[0, 1] = 0
+  result[1, 0] = x[0]*x[2]*x[2]
+  result[1, 1] = sin(2*x[2])
+  result[2, 0] = 2*x[0]*x[1]*x[2]
+  result[2, 1] = 2*x[1]*cos(2*x[2])
+  
+
+suite "Multi dimensional numeric gradients":
+  test "Scalar valued function of 3 variables":
+    for x in numericalnim.linspace(0, 1, 10):
+      for y in numericalnim.linspace(0, 1, 10):
+        for z in numericalnim.linspace(0, 1, 10):
+          let x0 = [x, y, z].toTensor
+          let numGrad = tensorGradient(fScalar, x0)
+          let exact = scalarGradient(x0)
+          for err in abs(numGrad - exact):
+            check err < 5e-10
+
+  test "Scalar valued function of 3 variables (fast mode)":
+    for x in numericalnim.linspace(0, 1, 10):
+      for y in numericalnim.linspace(0, 1, 10):
+        for z in numericalnim.linspace(0, 1, 10):
+          let x0 = [x, y, z].toTensor
+          let numGrad = tensorGradient(fScalar, x0, fastMode=true)
+          let exact = scalarGradient(x0)
+          for err in abs(numGrad - exact):
+            check err < 5e-6
+
+  test "Multi-dimensional function of 3 variables":
+    for x in numericalnim.linspace(0, 1, 10):
+      for y in numericalnim.linspace(0, 1, 10):
+        for z in numericalnim.linspace(0, 1, 10):
+          let x0 = [x, y, z].toTensor
+          let numGrad = tensorGradient(fMultidim, x0)
+          let exact = multidimGradient(x0)
+          for err in abs(numGrad - exact):
+            check err < 1e-10
+
+  test "Multi-dimensional function of 3 variables (fast mode)":
+    for x in numericalnim.linspace(0, 1, 10):
+      for y in numericalnim.linspace(0, 1, 10):
+        for z in numericalnim.linspace(0, 1, 10):
+          let x0 = [x, y, z].toTensor
+          let numGrad = tensorGradient(fMultidim, x0, fastMode=true)
+          let exact = multidimGradient(x0)
+          for err in abs(numGrad - exact):
+            check err < 2e-6
+
+  
+  test "Jacobian":
+    for x in numericalnim.linspace(0, 1, 10):
+      for y in numericalnim.linspace(0, 1, 10):
+        for z in numericalnim.linspace(0, 1, 10):
+          let x0 = [x, y, z].toTensor
+          let numJacobian = tensorJacobian(fMultidim, x0)
+          let exact = multidimGradient(x0).transpose
+          for err in abs(numJacobian - exact):
+            check err < 1e-10
+
+  test "checkGradient":
+    check checkGradient(fScalar, scalarGradient, [0.5, 0.5, 0.5].toTensor, 6e-11)
+    check checkGradient(fMultidim, multidimGradient, [0.5, 0.5, 0.5].toTensor, 4e-12)
+
+  test "Hessian scalar valued function":
+    for x in numericalnim.linspace(0, 1, 10):
+      for y in numericalnim.linspace(0, 1, 10):
+        for z in numericalnim.linspace(0, 1, 10):
+          let x0 = [x, y, z].toTensor
+          let numHessian = tensorHessian(fScalar, x0)
+          let exact = scalarHessian(x0)
+          for err in abs(numHessian - exact):
+            check err < 3e-4
diff --git a/tests/test_optimize.nim b/tests/test_optimize.nim
index fd03379..329cdfe 100644
--- a/tests/test_optimize.nim
+++ b/tests/test_optimize.nim
@@ -1,61 +1,140 @@
-import unittest, math, sequtils, arraymancer
+import std/[unittest, math, sequtils, random] 
+import arraymancer
 import numericalnim
 
-test "steepest_descent func":
-    proc df(x: float): float = 4 * x^3 - 9.0 * x^2
-    let start = 6.0
-    let gamma = 0.01
-    let precision = 0.00001
-    let max_iters = 10000
-    let correct = 2.24996
-    let value = steepest_descent(df, start, gamma, precision, max_iters)
-    check isClose(value, correct, tol = 1e-5)
-
-test "steepest_descent func starting at zero":
-    proc df(x: float): float = 4 * x^3 - 9.0 * x^2 + 4
-    let start = 0.0
-    let correct = -0.59301
-    let value = steepest_descent(df, start)
-    check isClose(value, correct, tol = 1e-5)
-
-#[
-test "conjugate_gradient func":
-    var A = toSeq([4.0, 1.0, 1.0, 3.0]).toTensor.reshape(2,2).astype(float64)
-    var x = toSeq([2.0, 1.0]).toTensor.reshape(2,1)
-    var b = toSeq([1.0,2.0]).toTensor.reshape(2,1)
-    let tol = 0.001
-    let correct = toSeq([0.090909, 0.636363]).toTensor.reshape(2,1).astype(float64)
-
-    let value = conjugate_gradient(A, b, x, tol)
-    check isClose(value, correct, tol = 1e-5)
-]#
-test "Newtons 1 dimension func":
-    proc f(x:float64): float64 = (1.0 / 3.0) * x ^ 3 - 2 * x ^ 2 + 3 * x
-    proc df(x:float64): float64 = x ^ 2 - 4 * x + 3
-    let x = 0.5
-    let correct = 0.0
-    let value = newtons(f, df, x, 0.000001, 1000)
-    check isClose(value, correct, tol=1e-5)
-
-test "Newtons 1 dimension func default args":
-    proc f(x:float64): float64 = (1.0 / 3.0) * x ^ 3 - 2 * x ^ 2 + 3 * x
-    proc df(x:float64): float64 = x ^ 2 - 4 * x + 3
-    let x = 0.5
-    let correct = 0.0
-    let value = newtons(f, df, x)
-    check isClose(value, correct, tol=1e-5)
-
-test "Newtons unable to find a root":
-    proc bad_f(x:float64): float64 = pow(E, x) + 1
-    proc bad_df(x:float64): float64 = pow(E, x)
-    expect(ArithmeticError):
-        discard newtons(bad_f, bad_df, 0, 0.000001, 1000)
-
-test "Secant 1 dimension func":
-    proc f(x:float64): float64 = (1.0 / 3.0) * x ^ 3 - 2 * x ^ 2 + 3 * x
-    let x = [1.0, 0.5]
-    let correct = 0.0
-    let value = secant(f, x, 1e-5, 10)
-    check isClose(value, correct, tol=1e-5)
+suite "1D":
+    test "steepest_descent func":
+        proc df(x: float): float = 4 * x^3 - 9.0 * x^2
+        let start = 6.0
+        let gamma = 0.01
+        let precision = 0.00001
+        let max_iters = 10000
+        let correct = 2.24996
+        let value = steepest_descent(df, start, gamma, precision, max_iters)
+        check isClose(value, correct, tol = 1e-5)
+
+    test "steepest_descent func starting at zero":
+        proc df(x: float): float = 4 * x^3 - 9.0 * x^2 + 4
+        let start = 0.0
+        let correct = -0.59301
+        let value = steepest_descent(df, start)
+        check isClose(value, correct, tol = 1e-5)
+
+    #[
+    test "conjugate_gradient func":
+        var A = toSeq([4.0, 1.0, 1.0, 3.0]).toTensor.reshape(2,2).astype(float64)
+        var x = toSeq([2.0, 1.0]).toTensor.reshape(2,1)
+        var b = toSeq([1.0,2.0]).toTensor.reshape(2,1)
+        let tol = 0.001
+        let correct = toSeq([0.090909, 0.636363]).toTensor.reshape(2,1).astype(float64)
+
+        let value = conjugate_gradient(A, b, x, tol)
+        check isClose(value, correct, tol = 1e-5)
+    ]#
+    test "Newtons 1 dimension func":
+        proc f(x:float64): float64 = (1.0 / 3.0) * x ^ 3 - 2 * x ^ 2 + 3 * x
+        proc df(x:float64): float64 = x ^ 2 - 4 * x + 3
+        let x = 0.5
+        let correct = 0.0
+        let value = newtons(f, df, x, 0.000001, 1000)
+        check isClose(value, correct, tol=1e-5)
+
+    test "Newtons 1 dimension func default args":
+        proc f(x:float64): float64 = (1.0 / 3.0) * x ^ 3 - 2 * x ^ 2 + 3 * x
+        proc df(x:float64): float64 = x ^ 2 - 4 * x + 3
+        let x = 0.5
+        let correct = 0.0
+        let value = newtons(f, df, x)
+        check isClose(value, correct, tol=1e-5)
+
+    test "Newtons unable to find a root":
+        proc bad_f(x:float64): float64 = pow(E, x) + 1
+        proc bad_df(x:float64): float64 = pow(E, x)
+        expect(ArithmeticError):
+            discard newtons(bad_f, bad_df, 0, 0.000001, 1000)
+
+    test "Secant 1 dimension func":
+        proc f(x:float64): float64 = (1.0 / 3.0) * x ^ 3 - 2 * x ^ 2 + 3 * x
+        let x = [1.0, 0.5]
+        let correct = 0.0
+        let value = secant(f, x, 1e-5, 10)
+        check isClose(value, correct, tol=1e-5)
+
+
+###############################
+## Multi-dimensional methods ##
+###############################
+
+suite "Multi-dim":
+    proc bananaFunc(x: Tensor[float]): float =
+        ## Function of 2 variables with minimum at (1, 1)
+        ## And it looks like a banana 🍌
+        result = (1 - x[0])^2 + 100*(x[1] - x[0]^2)^2
+
+    proc bananaBend(x: Tensor[float]): Tensor[float] =
+        ## Calculates the gradient of the banana function
+        result = newTensor[float](2)
+        result[0] = -2 * (1 - x[0]) + 100 * 2 * (x[1] - x[0]*x[0]) * -2 * x[0] # this one is wrong
+        result[1] = 100 * 2 * (x[1] - x[0]*x[0])
+    
+    let x0 = [-1.0, -1.0].toTensor()
+    let correct = [1.0, 1.0].toTensor()
+
+    doAssert checkGradient(bananaFunc, bananaBend, x0, 1e-6), "Analytic gradient is wrong in test!"
+
+    test "Steepest Gradient":
+        let xSol = steepestDescent(bananaFunc, x0.clone)
+        for x in abs(correct - xSol):
+            check x < 2e-2
+
+    test "Steepest Gradient analytic":
+        let xSol = steepestDescent(bananaFunc, x0.clone, analyticGradient=bananaBend)
+        for x in abs(correct - xSol):
+            check x < 2e-2
+    
+    test "Newton":
+        let xSol = newton(bananaFunc, x0.clone)
+        for x in abs(correct - xSol):
+            check x < 3e-10
+
+    test "Newton analytic":
+        let xSol = newton(bananaFunc, x0.clone, analyticGradient=bananaBend)
+        for x in abs(correct - xSol):
+            check x < 3e-10
+
+    test "BFGS":
+        let xSol = bfgs(bananaFunc, x0.clone)
+        for x in abs(correct - xSol):
+            check x < 3e-7
+
+    test "BFGS analytic":
+        let xSol = bfgs(bananaFunc, x0.clone, analyticGradient=bananaBend)
+        for x in abs(correct - xSol):
+            check x < 3e-7
+
+    test "L-BFGS":
+        let xSol = lbfgs(bananaFunc, x0.clone)
+        for x in abs(correct - xSol):
+            check x < 7e-10
+
+    test "L-BFGS analytic":
+        let xSol = lbfgs(bananaFunc, x0.clone, analyticGradient=bananaBend)
+        for x in abs(correct - xSol):
+            check x < 7e-10
+
+    let correctParams = [10.4, -0.45].toTensor()
+    proc fitFunc(params: Tensor[float], x: float): float =
+        params[0] * exp(params[1] * x)
+
+    let xData = arraymancer.linspace(0.0, 10.0, 100)
+    randomize(1337)
+    let yData = correctParams[0] * exp(correctParams[1] * xData) + randomNormalTensor([100], 0.0, 1e-2)
+
+    let params0 = [0.0, 0.0].toTensor()
+
+    test "levmarq":
+        let paramsSol = levmarq(fitFunc, params0, xData, yData)
+        for x in abs(paramsSol - correctParams):
+            check x < 1.3e-3