nn: Fix BCEWithLogits (#920)

gorse-io · Jan 6, 2025 · 5a123ca · 5a123ca
1 parent 21a724c
commit 5a123ca
Show file tree

Hide file tree

Showing 7 changed files with 27 additions and 46 deletions.
diff --git a/common/ann/hnsw.go b/common/ann/hnsw.go
@@ -214,8 +214,7 @@ func (h *HNSW[T]) getNeighbourhood(e int32, currentLayer int) *heap.PriorityQueu
 	if currentLayer == 0 {
 		return h.bottomNeighbors[e]
 	} else {
-		temp, _ := h.upperNeighbors[currentLayer-1][e]
-		return temp
+		return h.upperNeighbors[currentLayer-1][e]
 	}
 }
 
@@ -235,21 +234,6 @@ func (h *HNSW[T]) distance(q []T, points []int32) *heap.PriorityQueue {
 	return pq
 }
 
-func (h *HNSW[T]) efSearch(q []T, ef int) *heap.PriorityQueue {
-	var (
-		w           *heap.PriorityQueue                    // set for the current the nearest element
-		enterPoints = h.distance(q, []int32{h.enterPoint}) // get enter point for hnsw
-		topLayer    = len(h.upperNeighbors)                // top layer for hnsw
-	)
-	for currentLayer := topLayer; currentLayer > 0; currentLayer-- {
-		w = h.searchLayer(q, enterPoints, 1, currentLayer)
-		enterPoints = heap.NewPriorityQueue(false)
-		enterPoints.Push(w.Peek())
-	}
-	w = h.searchLayer(q, enterPoints, ef, 0)
-	return w
-}
-
 // efSearchValue returns the efSearch value to use, given the current number of elements desired.
 func (h *HNSW[T]) efSearchValue(n int) int {
 	if h.ef > 0 {

diff --git a/common/nn/functions.go b/common/nn/functions.go
@@ -14,6 +14,10 @@
 
 package nn
 
+import (
+	"fmt"
+)
+
 func Neg(x *Tensor) *Tensor {
 	return apply(&neg{}, x)
 }
@@ -27,7 +31,7 @@ func Add(x0 *Tensor, x ...*Tensor) *Tensor {
 		}
 		for i := 0; i < len(x1.shape); i++ {
 			if x0.shape[len(x0.shape)-len(x1.shape)+i] != x1.shape[i] {
-				panic("the shape of the second tensor must be a suffix sequence of the shape of the first tensor")
+				panic(fmt.Sprintf("the shape of one tensor %v must be a suffix sequence of the shape of the other tensor %v", x0.shape, x1.shape))
 			}
 		}
 		output = apply(&add{}, output, x1)
@@ -38,7 +42,7 @@ func Add(x0 *Tensor, x ...*Tensor) *Tensor {
 // Sub returns the element-wise difference of two tensors. The shape of the second tensor must be a suffix sequence of the shape of the first tensor.
 func Sub(x0, x1 *Tensor) *Tensor {
 	if len(x0.shape) < len(x1.shape) {
-		x0, x1 = x1, x0
+		panic(fmt.Sprintf("the shape of the second tensor %v must be a suffix sequence of the shape of the first tensor %v", x1.shape, x0.shape))
 	}
 	for i := 0; i < len(x1.shape); i++ {
 		if x0.shape[len(x0.shape)-len(x1.shape)+i] != x1.shape[i] {
@@ -55,7 +59,7 @@ func Mul(x0, x1 *Tensor) *Tensor {
 	}
 	for i := 0; i < len(x1.shape); i++ {
 		if x0.shape[len(x0.shape)-len(x1.shape)+i] != x1.shape[i] {
-			panic("the shape of the second tensor must be a suffix sequence of the shape of the first tensor")
+			panic(fmt.Sprintf("the shape of the second tensor %v must be a suffix sequence of the shape of the first tensor %v", x1.shape, x0.shape))
 		}
 	}
 	return apply(&mul{}, x0, x1)
@@ -64,7 +68,7 @@ func Mul(x0, x1 *Tensor) *Tensor {
 // Div returns the element-wise division of two tensors. The shape of the second tensor must be a suffix sequence of the shape of the first tensor.
 func Div(x0, x1 *Tensor) *Tensor {
 	if len(x0.shape) < len(x1.shape) {
-		x0, x1 = x1, x0
+		panic(fmt.Sprintf("the shape of the second tensor %v must be a suffix sequence of the shape of the first tensor %v", x1.shape, x0.shape))
 	}
 	for i := 0; i < len(x1.shape); i++ {
 		if x0.shape[len(x0.shape)-len(x1.shape)+i] != x1.shape[i] {
@@ -81,11 +85,11 @@ func Square(x *Tensor) *Tensor {
 
 // Pow returns the element-wise power of a tensor. The shape of the second tensor must be a suffix sequence of the shape of the first tensor.
 func Pow(x *Tensor, n *Tensor) *Tensor {
-	if len(x.shape) < len(x.shape) {
+	if len(x.shape) < len(n.shape) {
 		panic("the shape of the second tensor must be a suffix sequence of the shape of the first tensor")
 	}
-	for i := 0; i < len(x.shape); i++ {
-		if x.shape[len(x.shape)-len(x.shape)+i] != x.shape[i] {
+	for i := 0; i < len(n.shape); i++ {
+		if n.shape[len(n.shape)-len(x.shape)+i] != x.shape[i] {
 			panic("the shape of the second tensor must be a suffix sequence of the shape of the first tensor")
 		}
 	}
@@ -222,7 +226,7 @@ func BCEWithLogits(target, prediction *Tensor) *Tensor {
 			NewScalar(2)),
 		Div(
 			Mul(
-				Sub(NewScalar(1), target),
+				Sub(Ones(target.shape...), target),
 				Log(Add(NewScalar(1), Exp(prediction)))),
 			NewScalar(2))))
 }
diff --git a/common/nn/layers.go b/common/nn/layers.go
@@ -166,7 +166,7 @@ func Save[T Model](o T, path string) error {
 					newKey := make([]string, len(key))
 					copy(newKey, key)
 					newKey = append(newKey, field.Name)
-					if err = save(reflect.ValueOf(o).Field(i).Interface(), append(key, field.Name)); err != nil {
+					if err = save(reflect.ValueOf(o).Field(i).Interface(), newKey); err != nil {
 						return err
 					}
 				}

diff --git a/common/nn/nn_test.go b/common/nn/nn_test.go
@@ -260,11 +260,11 @@ func TestMNIST(t *testing.T) {
 			optimizer.Step()
 			sumLoss += loss.data[0]
 			sumAcc += accuracy(yPred, yBatch)
-			bar.Add(batchSize)
+			assert.NoError(t, bar.Add(batchSize))
 		}
 		sumLoss /= float32(train.A.shape[0] / batchSize)
 		sumAcc /= float32(train.A.shape[0] / batchSize)
-		bar.Finish()
+		assert.NoError(t, bar.Finish())
 		fmt.Println("Duration:", time.Since(startTime), "Loss:", sumLoss, "Accuracy:", sumAcc)
 	}
 

diff --git a/common/nn/tensor.go b/common/nn/tensor.go
@@ -651,9 +651,9 @@ func (t *Tensor) transpose() *Tensor {
 		panic("transpose requires at least 2-D tensor")
 	}
 	shape := make([]int, 0, len(t.shape))
-	batchSize := 0
+	batchSize := 1
 	for i := 0; i < len(t.shape)-2; i++ {
-		batchSize += t.shape[i]
+		batchSize *= t.shape[i]
 		shape = append(shape, t.shape[i])
 	}
 	m, n := t.shape[len(t.shape)-2], t.shape[len(t.shape)-1]

diff --git a/common/nn/tensor_test.go b/common/nn/tensor_test.go
@@ -76,6 +76,13 @@ func TestTensor_Sum(t *testing.T) {
 	assert.Equal(t, []float32{4, 6, 12, 14, 20, 22}, y.data)
 }
 
+func TestTensor_Transpose(t *testing.T) {
+	x := NewTensor([]float32{1, 2, 3, 4, 5, 6}, 3, 2)
+	y := x.transpose()
+	assert.Equal(t, []int{2, 3}, y.Shape())
+	assert.Equal(t, []float32{1, 3, 5, 2, 4, 6}, y.Data())
+}
+
 func (t *Tensor) matMulLegacy(other *Tensor, transpose1, transpose2 bool) *Tensor {
 	if !transpose1 && !transpose2 {
 		if len(t.shape) != 2 || len(other.shape) != 2 {

diff --git a/model/click/deepfm_v2.go b/model/click/deepfm_v2.go
@@ -63,20 +63,6 @@ type DeepFMV2 struct {
 	embeddingV nn.Layer
 	linear     []nn.Layer
 
-	// Adam optimizer variables
-	m_v  [][]float32
-	m_w  []float32
-	m_w0 [][]float32
-	v_v  [][]float32
-	v_w  []float32
-	v_w0 [][]float32
-	t    int
-
-	// preallocated arrays
-	dataV  []float32
-	dataW  []float32
-	dataW0 []float32
-
 	// Hyper parameters
 	batchSize    int
 	nFactors     int
@@ -245,7 +231,7 @@ func (fm *DeepFMV2) Init(trainSet *Dataset) {
 		_, x, _ := trainSet.Get(i)
 		fm.numDimension = mathutil.MaxVal(fm.numDimension, len(x))
 	}
-	fm.bias = nn.Rand()
+	fm.bias = nn.Zeros()
 	fm.embeddingW = nn.NewEmbedding(fm.numFeatures, 1)
 	fm.embeddingV = nn.NewEmbedding(fm.numFeatures, fm.nFactors)
 	fm.linear = []nn.Layer{nn.NewLinear(fm.numDimension*fm.nFactors, fm.hiddenLayers[0])}
@@ -310,7 +296,7 @@ func (fm *DeepFMV2) Forward(indices, values *nn.Tensor) *nn.Tensor {
 	sum = nn.Mul(sum, nn.NewScalar(0.5))
 	w := fm.embeddingW.Forward(indices)
 	linear := nn.BMM(w, x, true)
-	fmOutput := nn.Add(linear, fm.bias)
+	fmOutput := nn.Add(nn.Reshape(linear, fm.batchSize), nn.Reshape(sum, fm.batchSize), fm.bias)
 	fmOutput = nn.Flatten(fmOutput)
 
 	// deep network