ianbruene · ianbruene · Nov 18, 2019 · Nov 11, 2019 · Nov 11, 2019 · Nov 11, 2019
diff --git a/difflib/bytes/bytes.go b/difflib/bytes/bytes.go
@@ -294,26 +294,43 @@ func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match {
 	// find longest junk-free match
 	// during an iteration of the loop, j2len[j] = length of longest
 	// junk-free match ending with a[i-1] and b[j]
-	j2len := map[int]int{}
+	N := bhi - blo
+	j2len := make([]int, N)
+	newj2len := make([]int, N)
+	var indices []int
 	for i := alo; i != ahi; i++ {
 		// look at all instances of a[i] in b; note that because
 		// b2j has no junk keys, the loop is skipped if a[i] is junk
-		newj2len := map[int]int{}
-		for _, j := range m.b2j.get(m.a[i]) {
+		newindices := m.b2j.get(m.a[i])
+		for _, j := range newindices {
 			// a[i] matches b[j]
 			if j < blo {
 				continue
 			}
 			if j >= bhi {
 				break
 			}
-			k := j2len[j-1] + 1
-			newj2len[j] = k
+			k := 1
+			if j > blo {
+				k = j2len[j-1-blo] + 1
+			}
+			newj2len[j-blo] = k
 			if k > bestsize {
 				besti, bestj, bestsize = i-k+1, j-k+1, k
 			}
 		}
-		j2len = newj2len
+		// j2len = newj2len, clear and reuse j2len as newj2len
+		for _, j := range indices {
+			if j < blo {
+				continue
+			}
+			if j >= bhi {
+				break
+			}
+			j2len[j-blo] = 0
+		}
+		indices = newindices
+		j2len, newj2len = newj2len, j2len
 	}
 
 	// Extend the best by non-junk elements on each end.  In particular,

diff --git a/difflib/bytes/bytes_test.go b/difflib/bytes/bytes_test.go
@@ -5,9 +5,11 @@ import (
 	"fmt"
 	"math"
 	"reflect"
+	"runtime"
 	"strings"
 	"testing"
 	"sort"
+	"../tester"
 )
 
 func assertAlmostEqual(t *testing.T, a, b float64, places int) {
@@ -472,6 +474,51 @@ func BenchmarkSplitLines10000(b *testing.B) {
 	benchmarkSplitLines(b, 10000)
 }
 
+func prepareFilesToDiff(count, seed int) (As, Bs [][][]byte) {
+	defer runtime.GC()
+	aux := func () { // to ensure temp variables go out of scope
+		stringsA, stringsB := tester.PrepareStringsToDiff(count, seed)
+		As = make([][][]byte, len(stringsA))
+		Bs = make([][][]byte, len(stringsB))
+		for i := range As {
+			As[i] = stringsToBytes(stringsA[i]...)
+			Bs[i] = stringsToBytes(stringsB[i]...)
+		}
+	}
+	aux()
+	return
+}
+
+func BenchmarkDiffer(b *testing.B) {
+	A, B := prepareFilesToDiff(b.N, 0)
+	fmt.Printf("\nDiff length:")
+	b.ResetTimer()
+	differ := NewDiffer()
+	for i := range A {
+		var x [][]byte
+		for n := 0; n < 5; n++ {
+			x, _ = differ.Compare(A[i], B[i])
+		}
+		fmt.Printf(" %v", len(x))
+	}
+	fmt.Printf("\n")
+}
+
+func BenchmarkMatcher(b *testing.B) {
+	As, Bs := prepareFilesToDiff(b.N, 0)
+	fmt.Printf("\nOpcodes count:")
+	b.ResetTimer()
+	for i := range As {
+		var x []OpCode
+		for n := 0; n < 5; n++ {
+			sm := NewMatcher(As[i], Bs[i])
+			x = sm.GetOpCodes()
+		}
+		fmt.Printf(" %v", len(x))
+	}
+	fmt.Printf("\n")
+}
+
 func TestDifferCompare(t *testing.T) {
 	diff := NewDiffer()
 	// Test

diff --git a/difflib/difflib.go b/difflib/difflib.go
@@ -234,26 +234,43 @@ func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match {
 	// find longest junk-free match
 	// during an iteration of the loop, j2len[j] = length of longest
 	// junk-free match ending with a[i-1] and b[j]
-	j2len := map[int]int{}
+	N := bhi - blo
+	j2len := make([]int, N)
+	newj2len := make([]int, N)
+	var indices []int
 	for i := alo; i != ahi; i++ {
 		// look at all instances of a[i] in b; note that because
 		// b2j has no junk keys, the loop is skipped if a[i] is junk
-		newj2len := map[int]int{}
-		for _, j := range m.b2j[m.a[i]] {
+		newindices := m.b2j[m.a[i]]
+		for _, j := range newindices {
 			// a[i] matches b[j]
 			if j < blo {
 				continue
 			}
 			if j >= bhi {
 				break
 			}
-			k := j2len[j-1] + 1
-			newj2len[j] = k
+			k := 1
+			if j > blo {
+				k = j2len[j-1-blo] + 1
+			}
+			newj2len[j-blo] = k
 			if k > bestsize {
 				besti, bestj, bestsize = i-k+1, j-k+1, k
 			}
 		}
-		j2len = newj2len
+		// j2len = newj2len, clear and reuse j2len as newj2len
+		for _, j := range indices {
+			if j < blo {
+				continue
+			}
+			if j >= bhi {
+				break
+			}
+			j2len[j-blo] = 0
+		}
+		indices = newindices
+		j2len, newj2len = newj2len, j2len
 	}
 
 	// Extend the best by non-junk elements on each end.  In particular,

diff --git a/difflib/difflib_test.go b/difflib/difflib_test.go
@@ -5,8 +5,10 @@ import (
 	"fmt"
 	"math"
 	"reflect"
+	"runtime"
 	"strings"
 	"testing"
+	"./tester"
 )
 
 func assertAlmostEqual(t *testing.T, a, b float64, places int) {
@@ -432,6 +434,41 @@ func BenchmarkSplitLines10000(b *testing.B) {
 	benchmarkSplitLines(b, 10000)
 }
 
+func prepareFilesToDiff(count, seed int) (As, Bs [][]string) {
+	defer runtime.GC()
+	return tester.PrepareStringsToDiff(count, seed)
+}
+
+func BenchmarkDiffer(b *testing.B) {
+	A, B := prepareFilesToDiff(b.N, 0)
+	fmt.Printf("\nDiff length:")
+	b.ResetTimer()
+	differ := NewDiffer()
+	for i := range A {
+		var x []string
+		for n := 0; n < 5; n++ {
+			x, _ = differ.Compare(A[i], B[i])
+		}
+		fmt.Printf(" %v", len(x))
+	}
+	fmt.Printf("\n")
+}
+
+func BenchmarkMatcher(b *testing.B) {
+	A, B := prepareFilesToDiff(b.N, 0)
+	fmt.Printf("\nOpcodes count:")
+	b.ResetTimer()
+	for i := range A {
+		var x []OpCode
+		for n := 0; n < 5; n++ {
+			sm := NewMatcher(A[i], B[i])
+			x = sm.GetOpCodes()
+		}
+		fmt.Printf(" %v", len(x))
+	}
+	fmt.Printf("\n")
+}
+
 func TestDifferCompare(t *testing.T) {
 	diff := NewDiffer()
 	// Test

diff --git a/difflib/tester/tester.go b/difflib/tester/tester.go
@@ -0,0 +1,66 @@
+package tester
+
+import (
+	"math/rand"
+	"time"
+)
+
+func prepareStrings(seed int64) (A, B []string) {
+	if seed == -1 {
+		seed = time.Now().UnixNano()
+	}
+	rand.Seed(seed)
+	// Generate 4000 random lines
+	lines := [4000]string{}
+	for i := range lines {
+		l := rand.Intn(100)
+		p := make([]byte, l)
+		rand.Read(p)
+		lines[i] = string(p)
+	}
+	// Generate two 4000 lines documents by picking some lines at random
+	A = make([]string, 4000)
+	B = make([]string, len(A))
+	for i := range A {
+		// make the first 50 lines more likely to appear
+		if rand.Intn(100) < 40 {
+			A[i] = lines[rand.Intn(50)]
+		} else {
+			A[i] = lines[rand.Intn(len(lines))]
+		}
+		if rand.Intn(100) < 40 {
+			B[i] = lines[rand.Intn(50)]
+		} else {
+			B[i] = lines[rand.Intn(len(lines))]
+		}
+	}
+	// Do some copies from A to B
+	maxcopy := rand.Intn(len(A)-1)+1
+	for copied, tocopy := 0, rand.Intn(2*len(A)/3); copied < tocopy; {
+		l := rand.Intn(rand.Intn(maxcopy-1)+1)
+		for a,b,n := rand.Intn(len(A)), rand.Intn(len(B)), 0;
+		           a < len(A) && b < len(B) && n < l; a,b,n = a+1,b+1,n+1 {
+			B[b] = A[a]
+			copied++
+		}
+	}
+	// And some from B to A
+	for copied, tocopy := 0, rand.Intn(2*len(A)/3); copied < tocopy; {
+		l := rand.Intn(rand.Intn(maxcopy-1)+1)
+		for a,b,n := rand.Intn(len(A)), rand.Intn(len(B)), 0;
+		           a < len(A) && b < len(B) && n < l; a,b,n = a+1,b+1,n+1 {
+			A[a] = B[b]
+			copied++
+		}
+	}
+	return
+}
+
+func PrepareStringsToDiff(count, seed int) (As, Bs [][]string) {
+	As = make([][]string, count)
+	Bs = make([][]string, count)
+	for i := range As {
+		As[i], Bs[i] = prepareStrings(int64(i+seed))
+	}
+	return
+}