Skip to content

Commit

Permalink
nn: support multi-categories classification (#915)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghaoz authored Jan 4, 2025
1 parent 6f4235f commit 968f3ff
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 64 deletions.
4 changes: 2 additions & 2 deletions common/ann/ann_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (m *MNIST) openFile(path string) ([][]float32, []uint8, error) {
line := scanner.Text()
splits := strings.Split(line, " ")
// Parse label
label, err := util.ParseUInt8(splits[0])
label, err := util.ParseUInt[uint8](splits[0])
if err != nil {
return nil, nil, err
}
Expand All @@ -104,7 +104,7 @@ func (m *MNIST) openFile(path string) ([][]float32, []uint8, error) {
if err != nil {
return nil, nil, err
}
value, err := util.ParseFloat32(kv[1])
value, err := util.ParseFloat[float32](kv[1])
if err != nil {
return nil, nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion common/dataset/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func LoadIris() ([][]float32, []int, error) {
for i, row := range rows {
data[i] = make([]float32, 4)
for j, cell := range row[:4] {
data[i][j], err = util.ParseFloat32(cell)
data[i][j], err = util.ParseFloat[float32](cell)
if err != nil {
return nil, nil, err
}
Expand Down
19 changes: 18 additions & 1 deletion common/nn/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,27 @@ func ReLu(x *Tensor) *Tensor {
return apply(&relu{}, x)
}

func MSE(x, y *Tensor) *Tensor {
func Softmax(x *Tensor, axis int) *Tensor {
return apply(&softmax{axis: axis}, x)
}

func MeanSquareError(x, y *Tensor) *Tensor {
return Mean(Square(Sub(x, y)))
}

func SoftmaxCrossEntropy(x, y *Tensor) *Tensor {
if len(x.shape) != 2 {
panic("the shape of the first tensor must be 2-D")
}
if len(y.shape) != 1 {
panic("the shape of the second tensor must be 1-D")
}
if x.shape[0] != y.shape[0] {
panic("the size of the first tensor must be equal to the size of the second tensor")
}
return apply(&softmaxCrossEntropy{}, x, y)
}

// BCEWithLogits is equivalent to:
//
// (1 + target) * math32.Log(1+math32.Exp(-prediction)) / 2 + (1 - target) * math32.Log(1+math32.Exp(prediction)) / 2
Expand Down
20 changes: 17 additions & 3 deletions common/nn/layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ type linearLayer struct {

func NewLinear(in, out int) Layer {
return &linearLayer{
w: RandN(in, out).RequireGrad(),
b: RandN(out).RequireGrad(),
w: Rand(in, out).RequireGrad(),
b: Zeros(out).RequireGrad(),
}
}

Expand Down Expand Up @@ -62,7 +62,7 @@ type embeddingLayer struct {
func NewEmbedding(n int, shape ...int) Layer {
wShape := append([]int{n}, shape...)
return &embeddingLayer{
w: RandN(wShape...),
w: Rand(wShape...),
}
}

Expand All @@ -74,6 +74,20 @@ func (e *embeddingLayer) Forward(x *Tensor) *Tensor {
return Embedding(e.w, x)
}

type sigmoidLayer struct{}

func NewSigmoid() Layer {
return &sigmoidLayer{}
}

func (s *sigmoidLayer) Parameters() []*Tensor {
return nil
}

func (s *sigmoidLayer) Forward(x *Tensor) *Tensor {
return Sigmoid(x)
}

type reluLayer struct{}

func NewReLU() Layer {
Expand Down
141 changes: 141 additions & 0 deletions common/nn/nn_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright 2024 gorse Project Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nn

import (
"encoding/csv"
"github.com/chewxy/math32"
"github.com/stretchr/testify/assert"
"github.com/zhenghaoz/gorse/common/dataset"
"github.com/zhenghaoz/gorse/common/util"
"os"
"path/filepath"
"testing"
)

func TestLinearRegression(t *testing.T) {
x := Rand(100, 1)
y := Add(Rand(100, 1), NewScalar(5), Mul(NewScalar(2), x))

w := Zeros(1, 1)
b := Zeros(1)
predict := func(x *Tensor) *Tensor { return Add(MatMul(x, w), b) }

lr := float32(0.1)
for i := 0; i < 100; i++ {
yPred := predict(x)
loss := MeanSquareError(y, yPred)

w.grad = nil
b.grad = nil
loss.Backward()

w.sub(w.grad.mul(NewScalar(lr)))
b.sub(b.grad.mul(NewScalar(lr)))
}

assert.Equal(t, []int{1, 1}, w.shape)
assert.InDelta(t, float64(2), w.data[0], 0.5)
assert.Equal(t, []int{1}, b.shape)
assert.InDelta(t, float64(5), b.data[0], 0.5)
}

func TestNeuralNetwork(t *testing.T) {
x := Rand(100, 1)
y := Add(Rand(100, 1), Sin(Mul(x, NewScalar(2*math32.Pi))))

model := NewSequential(
NewLinear(1, 10),
NewSigmoid(),
NewLinear(10, 1),
)
NormalInit(model.(*Sequential).layers[0].(*linearLayer).w, 0, 0.01)
NormalInit(model.(*Sequential).layers[2].(*linearLayer).w, 0, 0.01)
optimizer := NewSGD(model.Parameters(), 0.2)

var l float32
for i := 0; i < 10000; i++ {
yPred := model.Forward(x)
loss := MeanSquareError(y, yPred)

optimizer.ZeroGrad()
loss.Backward()

optimizer.Step()
l = loss.data[0]
}
assert.InDelta(t, float64(0), l, 0.1)
}

func iris() (*Tensor, *Tensor, error) {
// Download dataset
path, err := dataset.DownloadAndUnzip("iris")
if err != nil {
return nil, nil, err
}
dataFile := filepath.Join(path, "iris.data")
// Load data
f, err := os.Open(dataFile)
if err != nil {
return nil, nil, err
}
reader := csv.NewReader(f)
rows, err := reader.ReadAll()
if err != nil {
return nil, nil, err
}
// Parse data
data := make([]float32, len(rows)*4)
target := make([]float32, len(rows))
types := make(map[string]int)
for i, row := range rows {
for j, cell := range row[:4] {
data[i*4+j], err = util.ParseFloat[float32](cell)
if err != nil {
return nil, nil, err
}
}
if _, exist := types[row[4]]; !exist {
types[row[4]] = len(types)
}
target[i] = float32(types[row[4]])
}
return NewTensor(data, len(rows), 4), NewTensor(target, len(rows)), nil
}

func TestIris(t *testing.T) {
x, y, err := iris()
assert.NoError(t, err)

model := NewSequential(
NewLinear(4, 100),
NewLinear(100, 100),
NewLinear(100, 3),
)
optimizer := NewAdam(model.Parameters(), 0.01)

var l float32
for i := 0; i < 1000; i++ {
yPred := model.Forward(x)
loss := SoftmaxCrossEntropy(yPred, y)

optimizer.ZeroGrad()
loss.Backward()

optimizer.Step()
l = loss.data[0]
}
assert.InDelta(t, float32(0), l, 0.1)
}
72 changes: 72 additions & 0 deletions common/nn/op.go
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,78 @@ func (r *relu) backward(dy *Tensor) []*Tensor {
return []*Tensor{dx}
}

type softmax struct {
base
axis int
}

func (s *softmax) String() string {
return "Softmax"
}

func (s *softmax) forward(inputs ...*Tensor) *Tensor {
x := inputs[0]
y := x.clone()
y.sub(x.max(s.axis, true))
y.exp()
y.div(y.sum(s.axis, true))
return y
}

func (s *softmax) backward(dy *Tensor) []*Tensor {
y := s.output
gx := y.clone()
gx.mul(dy)
sumdx := gx.sum(s.axis, true)
y.mul(sumdx)
gx.sub(y)
return []*Tensor{gx}
}

type softmaxCrossEntropy struct {
base
}

func (c *softmaxCrossEntropy) String() string {
return "SoftmaxCrossEntropy"
}

func (c *softmaxCrossEntropy) forward(inputs ...*Tensor) *Tensor {
x, t := inputs[0], inputs[1]
m := x.max(1, true)
s := x.clone().bSub(m) // x - m
s = s.exp() // exp(x - m)
s = s.sum(1, true) // sum(exp(x - m))
s.log() // log(sum(exp(x - m)))
m.add(s) // m + log(sum(exp(x - m)))
logP := x.clone().bSub(m) // x - (m + log(sum(exp(x - m))))
var crossEntropy float32
for i := 0; i < len(t.data); i++ {
crossEntropy -= logP.Get(i, int(t.data[i]))
}
crossEntropy /= float32(len(t.data))
return NewScalar(crossEntropy)
}

func (c *softmaxCrossEntropy) backward(dy *Tensor) []*Tensor {
x, t := c.inputs[0], c.inputs[1]
// gy *= 1/N
gy := dy.clone().mul(NewScalar(1 / float32(len(t.data))))
// y = softmax(x)
y := x.clone()
y.bSub(x.max(1, true))
y.exp()
y.bDiv(y.sum(1, true))
// convert to one-hot
oneHot := Zeros(x.shape...)
for i := 0; i < len(t.data); i++ {
oneHot.data[i*x.shape[1]+int(t.data[i])] = 1
}
// y = (y - t_onehot) * gy
y = y.sub(oneHot).mul(gy)
return []*Tensor{y, Zeros(t.shape...)}
}

type opHeap []op

func (h opHeap) Len() int {
Expand Down
Loading

0 comments on commit 968f3ff

Please sign in to comment.