diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go
index 00935d0c45638..b915da6929a15 100644
--- a/statistics/cmsketch.go
+++ b/statistics/cmsketch.go
@@ -65,11 +65,10 @@ func NewCMSketch(d, w int32) *CMSketch {
 // topNHelper wraps some variables used when building cmsketch with top n.
 type topNHelper struct {
 	sampleSize    uint64
-	counter       map[hack.MutableString]uint64
-	sorted        []uint64
+	sorted        []dataCnt
 	onlyOnceItems uint64
 	sumTopN       uint64
-	lastVal       uint64
+	actualNumTop  uint32
 }
 
 func newTopNHelper(sample [][]byte, numTop uint32) *topNHelper {
@@ -77,20 +76,16 @@ func newTopNHelper(sample [][]byte, numTop uint32) *topNHelper {
 	for i := range sample {
 		counter[hack.String(sample[i])]++
 	}
-	sorted, onlyOnceItems := make([]uint64, 0, len(counter)), uint64(0)
-	for _, cnt := range counter {
-		sorted = append(sorted, cnt)
+	sorted, onlyOnceItems := make([]dataCnt, 0, len(counter)), uint64(0)
+	for key, cnt := range counter {
+		sorted = append(sorted, dataCnt{hack.Slice(string(key)), cnt})
 		if cnt == 1 {
 			onlyOnceItems++
 		}
 	}
-	sort.Slice(sorted, func(i, j int) bool {
-		return sorted[i] > sorted[j]
-	})
+	sort.SliceStable(sorted, func(i, j int) bool { return sorted[i].cnt > sorted[j].cnt })
 
 	var (
-		// last is the last element in top N index should occurres atleast `last` times.
-		last      uint64
 		sumTopN   uint64
 		sampleNDV = uint32(len(sorted))
 	)
@@ -99,18 +94,18 @@ func newTopNHelper(sample [][]byte, numTop uint32) *topNHelper {
 	// frequency of the n-th element are added to the TopN statistics. We chose
 	// 2/3 as an empirical value because the average cardinality estimation
 	// error is relatively small compared with 1/2.
-	for i := uint32(0); i < sampleNDV && i < numTop*2; i++ {
-		if i >= numTop && sorted[i]*3 < sorted[numTop-1]*2 && last != sorted[i] {
+	var actualNumTop uint32
+	for ; actualNumTop < sampleNDV && actualNumTop < numTop*2; actualNumTop++ {
+		if actualNumTop >= numTop && sorted[actualNumTop].cnt*3 < sorted[numTop-1].cnt*2 {
 			break
 		}
-		if sorted[i] == 1 {
+		if sorted[actualNumTop].cnt == 1 {
 			break
 		}
-		last = sorted[i]
-		sumTopN += sorted[i]
+		sumTopN += sorted[actualNumTop].cnt
 	}
 
-	return &topNHelper{uint64(len(sample)), counter, sorted, onlyOnceItems, sumTopN, last}
+	return &topNHelper{uint64(len(sample)), sorted, onlyOnceItems, sumTopN, actualNumTop}
 }
 
 // NewCMSketchWithTopN returns a new CM sketch with TopN elements, the estimate NDV and the scale ratio.
@@ -130,22 +125,23 @@ func buildCMSWithTopN(helper *topNHelper, d, w int32, scaleRatio uint64, default
 	enableTopN := helper.sampleSize/topNThreshold <= helper.sumTopN
 	if enableTopN {
 		c.topN = make(map[uint64][]*TopNMeta)
+		for i := uint32(0); i < helper.actualNumTop; i++ {
+			data, cnt := helper.sorted[i].data, helper.sorted[i].cnt
+			h1, h2 := murmur3.Sum128(data)
+			c.topN[h1] = append(c.topN[h1], &TopNMeta{h2, data, cnt * scaleRatio})
+		}
+		helper.sorted = helper.sorted[helper.actualNumTop:]
 	}
 	c.defaultValue = defaultVal
-	for counterKey, cnt := range helper.counter {
-		data := hack.Slice(string(counterKey))
+	for i := range helper.sorted {
+		data, cnt := helper.sorted[i].data, helper.sorted[i].cnt
 		// If the value only occurred once in the sample, we assumes that there is no difference with
 		// value that does not occurred in the sample.
 		rowCount := defaultVal
 		if cnt > 1 {
 			rowCount = cnt * scaleRatio
 		}
-		if enableTopN && cnt >= helper.lastVal {
-			h1, h2 := murmur3.Sum128(data)
-			c.topN[h1] = append(c.topN[h1], &TopNMeta{h2, data, rowCount})
-		} else {
-			c.insertBytesByCount(data, rowCount)
-		}
+		c.insertBytesByCount(data, rowCount)
 	}
 	return
 }
diff --git a/statistics/cmsketch_test.go b/statistics/cmsketch_test.go
index 15decc68384c2..1f3d130f9e909 100644
--- a/statistics/cmsketch_test.go
+++ b/statistics/cmsketch_test.go
@@ -194,6 +194,7 @@ func (s *testStatisticsSuite) TestCMSketchTopN(c *C) {
 	for _, t := range tests {
 		lSketch, lMap, err := buildCMSketchTopNAndMap(d, w, 20, 1000, 0, total, imax, t.zipfFactor)
 		c.Check(err, IsNil)
+		c.Assert(len(lSketch.TopN()), LessEqual, 40)
 		avg, err := averageAbsoluteError(lSketch, lMap)
 		c.Assert(err, IsNil)
 		c.Check(avg, LessEqual, t.avgError)
diff --git a/statistics/sample.go b/statistics/sample.go
index 96cc22dead8a8..58460b003d196 100644
--- a/statistics/sample.go
+++ b/statistics/sample.go
@@ -25,7 +25,6 @@ import (
 	"github.com/pingcap/tidb/sessionctx/stmtctx"
 	"github.com/pingcap/tidb/types"
 	"github.com/pingcap/tidb/util/chunk"
-	"github.com/pingcap/tidb/util/hack"
 	"github.com/pingcap/tidb/util/sqlexec"
 	"github.com/pingcap/tipb/go-tipb"
 	"github.com/spaolacci/murmur3"
@@ -272,19 +271,13 @@ func (c *SampleCollector) ExtractTopN(numTop uint32) {
 	helper := newTopNHelper(values, numTop)
 	cms := c.CMSketch
 	cms.topN = make(map[uint64][]*TopNMeta)
-	dataCnts := make([]dataCnt, 0, len(helper.counter))
-	for key, cnt := range helper.counter {
-		if cnt >= helper.lastVal {
-			dataCnts = append(dataCnts, dataCnt{hack.Slice(string(key)), cnt})
-		}
-	}
-	// Sort them decreasingly so we can handle most frequent values first and reduce the probability of hash collision
+	// Process them decreasingly so we can handle most frequent values first and reduce the probability of hash collision
 	// by small values.
-	sort.SliceStable(dataCnts, func(i, j int) bool { return dataCnts[i].cnt >= dataCnts[j].cnt })
-	for _, dc := range dataCnts {
-		h1, h2 := murmur3.Sum128(dc.data)
+	for i := uint32(0); i < helper.actualNumTop; i++ {
+		data := helper.sorted[i].data
+		h1, h2 := murmur3.Sum128(data)
 		realCnt := cms.queryHashValue(h1, h2)
 		cms.subValue(h1, h2, realCnt)
-		cms.topN[h1] = append(cms.topN[h1], &TopNMeta{h2, dc.data, realCnt})
+		cms.topN[h1] = append(cms.topN[h1], &TopNMeta{h2, data, realCnt})
 	}
 }