Skip to content

Commit

Permalink
dealing with ColView() and RowView() method updates in sparse
Browse files Browse the repository at this point in the history
  • Loading branch information
james-bowman committed Sep 17, 2018
1 parent 7ef602f commit 86af1b5
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 28 deletions.
46 changes: 23 additions & 23 deletions randomprojection_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func TestRandomProjection(t *testing.T) {

for ti, test := range tests {
matrix := sparse.Random(sparse.CSRFormat, test.rows, test.cols, test.density).(sparse.TypeConverter).ToCSR()
query := matrix.ColView(0)
query := matrix.ToCSC().ColView(0)

// When transformed using sign random projections
transformer := NewRandomProjection(test.k, float64(test.density))
Expand All @@ -97,20 +97,20 @@ func TestRandomProjection(t *testing.T) {
if err != nil {
t.Errorf("Failed to transform matrix because %v\n", err)
}
m := reducedDimMatrix.(mat.ColViewer)
m := reducedDimMatrix.(*sparse.CSR).ToCSC()

reducedDimQuery, err := transformer.Transform(query)
if err != nil {
t.Errorf("Failed to transform query because %v\n", err)
}
q := reducedDimQuery.(mat.ColViewer).ColView(0)
q := reducedDimQuery.(*sparse.CSR).ToCSC().ColView(0)

var culmDiff float64
for i := 0; i < test.cols; i++ {
angSim := pairwise.CosineSimilarity(query, matrix.ColView(i))
lshSim := pairwise.CosineSimilarity(q, m.ColView(i))
ColDo(matrix, func(j int, v mat.Vector) {
angSim := pairwise.CosineSimilarity(query, v)
lshSim := pairwise.CosineSimilarity(q, m.ColView(j))

if i == 0 {
if j == 0 {
if math.Abs(angSim-lshSim) >= 0.0000001 {
t.Errorf("Test %d: Expected matching similarity but found %.10f (Ang) and %.10f (LSH)\n", ti, angSim, lshSim)
}
Expand All @@ -119,7 +119,7 @@ func TestRandomProjection(t *testing.T) {
//diff := math.Abs(lshSim-angSim) / angSim
diff := math.Abs(lshSim - angSim)
culmDiff += diff
}
})
t.Logf("CulmDiff = %f\n", culmDiff)
avgDiff := culmDiff / float64(test.cols)

Expand Down Expand Up @@ -150,7 +150,7 @@ func TestRandomIndexing(t *testing.T) {

for ti, test := range tests {
matrix := sparse.Random(sparse.CSRFormat, test.rows, test.cols, test.density).(sparse.TypeConverter).ToCSR()
query := matrix.ColView(0)
query := matrix.ToCSC().ColView(0)

// When transformed using sign random projections
transformer := NewRandomIndexing(test.k, float64(test.density))
Expand All @@ -159,7 +159,7 @@ func TestRandomIndexing(t *testing.T) {
if err != nil {
t.Errorf("Failed to transform matrix because %v\n", err)
}
m := reducedDimMatrix.(mat.ColViewer)
m := reducedDimMatrix.(*sparse.CSC)

reducedDimQuery, err := transformer.Transform(query)
if err != nil {
Expand All @@ -168,11 +168,11 @@ func TestRandomIndexing(t *testing.T) {
q := reducedDimQuery.(mat.ColViewer).ColView(0)

var culmDiff float64
for i := 0; i < test.cols; i++ {
angSim := pairwise.CosineSimilarity(query, matrix.ColView(i))
lshSim := pairwise.CosineSimilarity(q, m.ColView(i))
ColDo(matrix, func(j int, v mat.Vector) {
angSim := pairwise.CosineSimilarity(query, v)
lshSim := pairwise.CosineSimilarity(q, m.ColView(j))

if i == 0 {
if j == 0 {
if math.Abs(angSim-lshSim) >= 0.05 {
t.Errorf("Test %d: Expected matching similarity but found %.10f (Ang) and %.10f (LSH)\n", ti, angSim, lshSim)
}
Expand All @@ -181,7 +181,7 @@ func TestRandomIndexing(t *testing.T) {
//diff := math.Abs(lshSim-angSim) / angSim
diff := math.Abs(lshSim - angSim)
culmDiff += diff
}
})
t.Logf("CulmDiff = %f\n", culmDiff)
avgDiff := culmDiff / float64(test.cols)

Expand Down Expand Up @@ -212,7 +212,7 @@ func TestReflectiveRandomIndexing(t *testing.T) {

for ti, test := range tests {
matrix := sparse.Random(sparse.CSRFormat, test.rows, test.cols, test.density).(sparse.TypeConverter).ToCSR()
query := matrix.ColView(0)
query := matrix.ToCSC().ColView(0)

// When transformed using Reflective Random Indexing
transformer := NewReflectiveRandomIndexing(test.k, ColBasedRI, 0, float64(test.density))
Expand All @@ -230,19 +230,19 @@ func TestReflectiveRandomIndexing(t *testing.T) {
q := reducedDimQuery.(mat.ColViewer).ColView(0)

var culmDiff float64
for i := 0; i < test.cols; i++ {
origSim := pairwise.CosineSimilarity(query, matrix.ColView(i))
redSim := pairwise.CosineSimilarity(q, m.ColView(i))
ColDo(matrix, func(j int, v mat.Vector) {
origSim := pairwise.CosineSimilarity(query, v)
redSim := pairwise.CosineSimilarity(q, m.ColView(j))

if i == 0 {
if j == 0 {
if math.Abs(origSim-redSim) >= 0.0000001 {
t.Errorf("Test %d: Expected matching similarity but found %.10f (Original) and %.10f (Reduced)\n", ti, origSim, redSim)
}
}

diff := math.Abs(redSim - origSim)
culmDiff += diff
}
})
t.Logf("CulmDiff = %f\n", culmDiff)
avgDiff := culmDiff / float64(test.cols)

Expand All @@ -254,8 +254,8 @@ func TestReflectiveRandomIndexing(t *testing.T) {
if r != test.k || c != test.cols {
t.Errorf("Test %d: Expected output matrix to be %dx%d but was %dx%d\n", ti, test.k, test.cols, r, c)
}
if avgDiff >= 0.11 {
t.Errorf("Test %d: Expected difference between vector spaces %f but was %f\n", ti, 0.03, avgDiff)
if avgDiff >= 0.12 {
t.Errorf("Test %d: Expected difference between vector spaces %f but was %f\n", ti, 0.12, avgDiff)
}
}
}
18 changes: 13 additions & 5 deletions utils.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
package nlp

import "gonum.org/v1/gonum/mat"
import (
"github.com/james-bowman/sparse"
"gonum.org/v1/gonum/mat"
)

// ColDo executes fn for each column j in m
// ColDo executes fn for each column j in m. If the matrix implements the mat.ColViewer
// interface then this interface will be used to iterate over the column vectors more
// efficiently. If the matrix implements the sparse.TypeConverter interface then the
// matrix will be converted to a CSC matrix (which implements the mat.ColViewer
// interface) so that it can benefit from the same optimisation.
func ColDo(m mat.Matrix, fn func(j int, vec mat.Vector)) {
if v, isOk := m.(mat.Vector); isOk {
fn(0, v)
Expand All @@ -17,10 +24,11 @@ func ColDo(m mat.Matrix, fn func(j int, vec mat.Vector)) {
return
}

if cv, isOk := m.(mat.RawColViewer); isOk {
r, c := m.Dims()
if sv, isOk := m.(sparse.TypeConverter); isOk {
csc := sv.ToCSC()
_, c := csc.Dims()
for j := 0; j < c; j++ {
fn(j, mat.NewVecDense(r, cv.RawColView(j)))
fn(j, csc.ColView(j))
}
return
}
Expand Down

0 comments on commit 86af1b5

Please sign in to comment.