stats: do not split excluded lower value ranges (pingcap#12009)

matthewli968 · Sep 11, 2019 · 440bb74 · 440bb74
1 parent 45fcba1
commit 440bb74
Show file tree

Hide file tree

Showing 6 changed files with 97 additions and 89 deletions.
diff --git a/statistics/feedback.go b/statistics/feedback.go
@@ -313,15 +313,21 @@ func buildBucketFeedback(h *Histogram, feedback *QueryFeedback) (map[int]*Bucket
 		if skip {
 			continue
 		}
-		idx, _ := h.Bounds.LowerBound(0, fb.Lower)
+		idx := h.Bounds.UpperBound(0, fb.Lower)
 		bktIdx := 0
 		// The last bucket also stores the feedback that falls outside the upper bound.
-		if idx >= h.Bounds.NumRows()-2 {
+		if idx >= h.Bounds.NumRows()-1 {
 			bktIdx = h.Len() - 1
+		} else if h.Len() == 1 {
+			bktIdx = 0
 		} else {
-			bktIdx = idx / 2
+			if idx == 0 {
+				bktIdx = 0
+			} else {
+				bktIdx = (idx - 1) / 2
+			}
 			// Make sure that this feedback lies within the bucket.
-			if chunk.Compare(h.Bounds.GetRow(2*bktIdx+1), 0, fb.Upper) < 0 {
+			if chunk.Compare(h.Bounds.GetRow(2*(bktIdx+1)), 0, fb.Upper) < 0 {
 				continue
 			}
 		}

diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go
@@ -72,14 +72,13 @@ func (s *testFeedbackSuite) TestUpdateHistogram(c *C) {
 	defaultBucketCount = 7
 	defer func() { defaultBucketCount = originBucketCount }()
 	c.Assert(UpdateHistogram(q.Hist, q).ToString(0), Equals,
-		"column:0 ndv:10058 totColSize:0\n"+
-			"num: 10000 lower_bound: 0 upper_bound: 1 repeats: 0\n"+
-			"num: 9 lower_bound: 2 upper_bound: 7 repeats: 0\n"+
-			"num: 11 lower_bound: 8 upper_bound: 19 repeats: 0\n"+
-			"num: 0 lower_bound: 20 upper_bound: 20 repeats: 0\n"+
-			"num: 18 lower_bound: 21 upper_bound: 39 repeats: 0\n"+
-			"num: 18 lower_bound: 40 upper_bound: 58 repeats: 0\n"+
-			"num: 2 lower_bound: 59 upper_bound: 60 repeats: 0")
+		"column:0 ndv:10053 totColSize:0\n"+
+			"num: 10001 lower_bound: 0 upper_bound: 2 repeats: 0\n"+
+			"num: 7 lower_bound: 2 upper_bound: 5 repeats: 0\n"+
+			"num: 4 lower_bound: 5 upper_bound: 7 repeats: 0\n"+
+			"num: 11 lower_bound: 10 upper_bound: 20 repeats: 0\n"+
+			"num: 19 lower_bound: 30 upper_bound: 49 repeats: 0\n"+
+			"num: 11 lower_bound: 50 upper_bound: 60 repeats: 0")
 }
 
 func (s *testFeedbackSuite) TestSplitBuckets(c *C) {

diff --git a/statistics/handle/update.go b/statistics/handle/update.go
@@ -766,11 +766,11 @@ func formatBuckets(hg *statistics.Histogram, lowBkt, highBkt, idxCols int) strin
 		return hg.BucketToString(lowBkt, idxCols)
 	}
 	if lowBkt+1 == highBkt {
-		return fmt.Sprintf("%s, %s", hg.BucketToString(lowBkt, 0), hg.BucketToString(highBkt, 0))
+		return fmt.Sprintf("%s, %s", hg.BucketToString(lowBkt, idxCols), hg.BucketToString(highBkt, idxCols))
 	}
 	// do not care the middle buckets
-	return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.BucketToString(lowBkt, 0),
-		highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.BucketToString(highBkt, 0))
+	return fmt.Sprintf("%s, (%d buckets, total count %d), %s", hg.BucketToString(lowBkt, idxCols),
+		highBkt-lowBkt-1, hg.Buckets[highBkt-1].Count-hg.Buckets[lowBkt].Count, hg.BucketToString(highBkt, idxCols))
 }
 
 func colRangeToStr(c *statistics.Column, ran *ranger.Range, actual int64, factor float64) string {

diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go
@@ -684,18 +684,18 @@ func (s *testStatsSuite) TestSplitRange(c *C) {
 		{
 			points:  []int64{0, 1, 3, 8, 8, 20},
 			exclude: []bool{true, false, true, false, true, false},
-			result:  "(0,1],(3,5],(5,7],(7,8],(8,20]",
+			result:  "(0,1],(3,7),[7,8),[8,8],(8,10),[10,20]",
 		},
 		{
 			points:  []int64{8, 10, 20, 30},
 			exclude: []bool{false, false, true, true},
-			result:  "[8,8],(8,10],(20,30)",
+			result:  "[8,10),[10,10],(20,30)",
 		},
 		{
 			// test remove invalid range
 			points:  []int64{8, 9},
 			exclude: []bool{false, true},
-			result:  "[8,8]",
+			result:  "[8,9)",
 		},
 	}
 	for _, t := range tests {
@@ -743,25 +743,25 @@ func (s *testStatsSuite) TestQueryFeedback(c *C) {
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5 order by a desc",
 			hist: "column:1 ndv:4 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 1 repeats: 0\n" +
-				"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n" +
-				"num: 2 lower_bound: 3 upper_bound: 5 repeats: 0",
+				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0\n" +
+				"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n" +
+				"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1",
 			idxCols: 0,
 		},
 		{
 			// test index feedback by double read
 			sql: "select * from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:2\n" +
-				"num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n" +
-				"num: 2 lower_bound: 3 upper_bound: 6 repeats: 0",
+				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0\n" +
+				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1",
 			idxCols: 1,
 		},
 		{
 			// test index feedback by single read
 			sql: "select b from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:2\n" +
-				"num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n" +
-				"num: 2 lower_bound: 3 upper_bound: 6 repeats: 0",
+				"num: 3 lower_bound: -inf upper_bound: 5 repeats: 0\n" +
+				"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1",
 			idxCols: 1,
 		},
 	}
@@ -855,7 +855,7 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5",
 			hist: "column:1 ndv:2 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 1 repeats: 0\n" +
+				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0\n" +
 				"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0",
 			idxCols: 0,
 		},
@@ -987,17 +987,17 @@ func (s *testStatsSuite) TestUpdateStatsByLocalFeedback(c *C) {
 
 	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
 		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+
-		"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n"+
-		"num: 2 lower_bound: 3 upper_bound: 9223372036854775807 repeats: 0")
+		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n"+
+		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0")
 	sc := &stmtctx.StatementContext{TimeZone: time.Local}
 	low, err := codec.EncodeKey(sc, nil, types.NewIntDatum(5))
 	c.Assert(err, IsNil)
 
 	c.Assert(tbl.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(low), Equals, uint64(2))
 
 	c.Assert(tbl.Indices[tblInfo.Indices[0].ID].ToString(1), Equals, "index:1 ndv:2\n"+
-		"num: 2 lower_bound: -inf upper_bound: 2 repeats: 0\n"+
-		"num: 2 lower_bound: 3 upper_bound: 6 repeats: 0")
+		"num: 2 lower_bound: -inf upper_bound: 5 repeats: 0\n"+
+		"num: 1 lower_bound: 5 upper_bound: 5 repeats: 1")
 
 	// Test that it won't cause panic after update.
 	testKit.MustQuery("select * from t use index(idx) where b > 0")
@@ -1038,8 +1038,8 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) {
 
 	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
 		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1\n"+
-		"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n"+
-		"num: 2 lower_bound: 3 upper_bound: 9223372036854775807 repeats: 0")
+		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0\n"+
+		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0")
 }
 
 type logHook struct {
@@ -1112,13 +1112,13 @@ func (s *testStatsSuite) TestLogDetailedInfo(c *C) {
 	}{
 		{
 			sql: "select * from t where t.a <= 15",
-			result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,7), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}" +
+			result: "[stats-feedback] test.t, column=a, rangeStr=range: [-inf,8), actual: 8, expected: 8, buckets: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}" +
 				"[stats-feedback] test.t, column=a, rangeStr=range: [8,15), actual: 8, expected: 7, buckets: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}",
 		},
 		{
 			sql: "select * from t use index(idx) where t.b <= 15",
-			result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,7), actual: 8, expected: 7, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1}" +
-				"[stats-feedback] test.t, index=idx, rangeStr=range: [8,15), actual: 8, expected: 7, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}",
+			result: "[stats-feedback] test.t, index=idx, rangeStr=range: [-inf,8), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 0 upper_bound: 7 repeats: 1, num: 8 lower_bound: 8 upper_bound: 15 repeats: 1}" +
+				"[stats-feedback] test.t, index=idx, rangeStr=range: [8,16), actual: 8, expected: 8, histogram: {num: 8 lower_bound: 8 upper_bound: 15 repeats: 1, num: 4 lower_bound: 16 upper_bound: 19 repeats: 1}",
 		},
 		{
 			sql:    "select b from t use index(idx_ba) where b = 1 and a <= 5",
@@ -1466,9 +1466,9 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) {
 			// The real count of `a = 1` is 0.
 			sql: "select * from t where a = 1 and b < 21",
 			hist: "column:2 ndv:20 totColSize:20\n" +
-				"num: 4 lower_bound: -9223372036854775808 upper_bound: 6 repeats: 0\n" +
-				"num: 3 lower_bound: 7 upper_bound: 13 repeats: 0\n" +
-				"num: 6 lower_bound: 14 upper_bound: 19 repeats: 1",
+				"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" +
+				"num: 4 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
+				"num: 4 lower_bound: 14 upper_bound: 21 repeats: 0",
 			rangeID: tblInfo.Columns[1].ID,
 			idxID:   tblInfo.Indices[0].ID,
 			eqCount: 3,
@@ -1477,9 +1477,9 @@ func (s *testStatsSuite) TestAbnormalIndexFeedback(c *C) {
 			// The real count of `b > 10` is 0.
 			sql: "select * from t where a = 2 and b > 10",
 			hist: "column:2 ndv:20 totColSize:20\n" +
-				"num: 4 lower_bound: -9223372036854775808 upper_bound: 6 repeats: 0\n" +
-				"num: 2 lower_bound: 7 upper_bound: 13 repeats: 0\n" +
-				"num: 6 lower_bound: 14 upper_bound: 19 repeats: 1",
+				"num: 5 lower_bound: -9223372036854775808 upper_bound: 7 repeats: 0\n" +
+				"num: 6 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
+				"num: 7 lower_bound: 14 upper_bound: 9223372036854775807 repeats: 0",
 			rangeID: tblInfo.Columns[1].ID,
 			idxID:   tblInfo.Indices[0].ID,
 			eqCount: 3,
@@ -1531,25 +1531,25 @@ func (s *testStatsSuite) TestFeedbackRanges(c *C) {
 		{
 			sql: "select * from t where a <= 50 or (a > 130 and a < 140)",
 			hist: "column:1 ndv:30 totColSize:0\n" +
-				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
+				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
 				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
 			colID: 1,
 		},
 		{
 			sql: "select * from t where a >= 10",
 			hist: "column:1 ndv:30 totColSize:0\n" +
-				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
+				"num: 8 lower_bound: -128 upper_bound: 8 repeats: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
 				"num: 14 lower_bound: 16 upper_bound: 127 repeats: 0",
 			colID: 1,
 		},
 		{
 			sql: "select * from t use index(idx) where a = 1 and (b <= 50 or (b > 130 and b < 140))",
 			hist: "column:2 ndv:20 totColSize:30\n" +
-				"num: 7 lower_bound: -128 upper_bound: 6 repeats: 0\n" +
-				"num: 7 lower_bound: 7 upper_bound: 13 repeats: 1\n" +
-				"num: 6 lower_bound: 14 upper_bound: 19 repeats: 1",
+				"num: 8 lower_bound: -128 upper_bound: 7 repeats: 0\n" +
+				"num: 8 lower_bound: 7 upper_bound: 14 repeats: 0\n" +
+				"num: 7 lower_bound: 14 upper_bound: 51 repeats: 0",
 			colID: 2,
 		},
 	}
@@ -1604,32 +1604,32 @@ func (s *testStatsSuite) TestUnsignedFeedbackRanges(c *C) {
 		{
 			sql: "select * from t where a <= 50",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
 				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
 			tblName: "t",
 		},
 		{
 			sql: "select count(*) from t",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
 				"num: 14 lower_bound: 16 upper_bound: 255 repeats: 0",
 			tblName: "t",
 		},
 		{
 			sql: "select * from t1 where a <= 50",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
 				"num: 14 lower_bound: 16 upper_bound: 50 repeats: 0",
 			tblName: "t1",
 		},
 		{
 			sql: "select count(*) from t1",
 			hist: "column:1 ndv:30 totColSize:10\n" +
-				"num: 8 lower_bound: 0 upper_bound: 7 repeats: 0\n" +
-				"num: 8 lower_bound: 8 upper_bound: 15 repeats: 0\n" +
+				"num: 8 lower_bound: 0 upper_bound: 8 repeats: 0\n" +
+				"num: 8 lower_bound: 8 upper_bound: 16 repeats: 0\n" +
 				"num: 14 lower_bound: 16 upper_bound: 18446744073709551615 repeats: 0",
 			tblName: "t1",
 		},

diff --git a/statistics/histogram.go b/statistics/histogram.go
@@ -432,41 +432,43 @@ func (hg *Histogram) typeMatch(ranges []*ranger.Range) bool {
 	return true
 }
 
-// SplitRange splits the range according to the histogram upper bound. Note that we treat last bucket's upper bound
-// as inf, so all the split Ranges will totally fall in one of the (-inf, u(0)], (u(0), u(1)],...(u(n-3), u(n-2)],
-// (u(n-2), +inf), where n is the number of buckets, u(i) is the i-th bucket's upper bound.
+// SplitRange splits the range according to the histogram lower bound. Note that we treat first bucket's lower bound
+// as -inf and last bucket's upper bound as +inf, so all the split ranges will totally fall in one of the (-inf, l(1)),
+// [l(1), l(2)),...[l(n-2), l(n-1)), [l(n-1), +inf), where n is the number of buckets, l(i) is the i-th bucket's lower bound.
 func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*ranger.Range, encoded bool) ([]*ranger.Range, bool) {
 	if !hg.typeMatch(oldRanges) {
 		return oldRanges, false
 	}
+	// Treat the only buckets as (-inf, +inf), so we do not need split it.
+	if hg.Len() == 1 {
+		return oldRanges, true
+	}
 	ranges := make([]*ranger.Range, 0, len(oldRanges))
 	for _, ran := range oldRanges {
 		ranges = append(ranges, ran.Clone())
 	}
 	split := make([]*ranger.Range, 0, len(ranges))
 	for len(ranges) > 0 {
-		// Find the last bound that greater or equal to the LowVal.
+		// Find the first bound that greater than the LowVal.
 		idx := hg.Bounds.UpperBound(0, &ranges[0].LowVal[0])
-		if !ranges[0].LowExclude && idx > 0 {
-			cmp := chunk.Compare(hg.Bounds.GetRow(idx-1), 0, &ranges[0].LowVal[0])
-			if cmp == 0 {
-				idx--
-			}
-		}
-		// Treat last bucket's upper bound as inf, so we do not need split any more.
-		if idx >= hg.Bounds.NumRows()-2 {
+		// Treat last bucket's upper bound as +inf, so we do not need split any more.
+		if idx >= hg.Bounds.NumRows()-1 {
 			split = append(split, ranges...)
 			break
 		}
-		// Get the corresponding upper bound.
-		if idx%2 == 0 {
+		// Treat first buckets's lower bound as -inf, just increase it to the next lower bound.
+		if idx == 0 {
+			idx = 2
+		}
+		// Get the next lower bound.
+		if idx%2 == 1 {
 			idx++
 		}
-		upperBound := hg.Bounds.GetRow(idx)
+		lowerBound := hg.Bounds.GetRow(idx)
 		var i int
-		// Find the first range that need to be split by the upper bound.
+		// Find the first range that need to be split by the lower bound.
 		for ; i < len(ranges); i++ {
-			if chunk.Compare(upperBound, 0, &ranges[i].HighVal[0]) < 0 {
+			if chunk.Compare(lowerBound, 0, &ranges[i].HighVal[0]) <= 0 {
 				break
 			}
 		}
@@ -475,17 +477,20 @@ func (hg *Histogram) SplitRange(sc *stmtctx.StatementContext, oldRanges []*range
 		if len(ranges) == 0 {
 			break
 		}
-		// Split according to the upper bound.
-		cmp := chunk.Compare(upperBound, 0, &ranges[0].LowVal[0])
-		if cmp > 0 || (cmp == 0 && !ranges[0].LowExclude) {
-			upper := upperBound.GetDatum(0, hg.Tp)
-			split = append(split, &ranger.Range{
+		// Split according to the lower bound.
+		cmp := chunk.Compare(lowerBound, 0, &ranges[0].LowVal[0])
+		if cmp > 0 {
+			lower := lowerBound.GetDatum(0, hg.Tp)
+			newRange := &ranger.Range{
 				LowExclude:  ranges[0].LowExclude,
 				LowVal:      []types.Datum{ranges[0].LowVal[0]},
-				HighVal:     []types.Datum{upper},
-				HighExclude: false})
-			ranges[0].LowVal[0] = upper
-			ranges[0].LowExclude = true
+				HighVal:     []types.Datum{lower},
+				HighExclude: true}
+			if validRange(sc, newRange, encoded) {
+				split = append(split, newRange)
+			}
+			ranges[0].LowVal[0] = lower
+			ranges[0].LowExclude = false
 			if !validRange(sc, ranges[0], encoded) {
 				ranges = ranges[1:]
 			}

diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go
@@ -50,11 +50,9 @@ func (s *testStatisticsSuite) TestNewHistogramBySelectivity(c *C) {
 	node.Ranges = append(node.Ranges, &ranger.Range{LowVal: types.MakeDatums(25), HighVal: []types.Datum{types.MaxValueDatum()}})
 	intColResult := `column:1 ndv:16 totColSize:0
 num: 30 lower_bound: 0 upper_bound: 2 repeats: 10
-num: 10 lower_bound: 3 upper_bound: 5 repeats: 10
-num: 20 lower_bound: 6 upper_bound: 8 repeats: 10
-num: 20 lower_bound: 9 upper_bound: 11 repeats: 0
+num: 20 lower_bound: 6 upper_bound: 8 repeats: 0
+num: 30 lower_bound: 9 upper_bound: 11 repeats: 0
 num: 10 lower_bound: 12 upper_bound: 14 repeats: 0
-num: 20 lower_bound: 24 upper_bound: 26 repeats: 10
 num: 30 lower_bound: 27 upper_bound: 29 repeats: 0`
 
 	stringCol := &Column{}
@@ -85,9 +83,9 @@ num: 30 lower_bound: 27 upper_bound: 29 repeats: 0`
 	node2.Ranges = append(node2.Ranges, &ranger.Range{LowVal: types.MakeDatums("ggg"), HighVal: []types.Datum{types.MaxValueDatum()}})
 	stringColResult := `column:2 ndv:9 totColSize:0
 num: 60 lower_bound: a upper_bound: aaaabbbb repeats: 0
-num: 60 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 20
-num: 60 lower_bound: kkkkk upper_bound: ooooo repeats: 20
-num: 60 lower_bound: oooooo upper_bound: sssss repeats: 20
+num: 52 lower_bound: bbbb upper_bound: fdsfdsfds repeats: 0
+num: 54 lower_bound: kkkkk upper_bound: ooooo repeats: 0
+num: 60 lower_bound: oooooo upper_bound: sssss repeats: 0
 num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0`
 
 	newColl := coll.NewHistCollBySelectivity(sc, []*StatsNode{node, node2})