diff --git a/statistics/analyze_jobs_test.go b/statistics/analyze_jobs_test.go index 0c91672ea9bc6..7086d62667b67 100644 --- a/statistics/analyze_jobs_test.go +++ b/statistics/analyze_jobs_test.go @@ -31,4 +31,6 @@ func (s *testStatisticsSuite) TestMoveToHistory(c *C) { MoveToHistory(jobs[i]) } c.Assert(len(GetAllAnalyzeJobs()), Equals, numMaxHistoryJobs) + ClearHistoryJobs() + c.Assert(len(GetAllAnalyzeJobs()), Equals, 0) } diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 727049675b960..95acef15a5c37 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -17,6 +17,7 @@ import ( "bytes" "fmt" "math" + "reflect" "sort" "github.com/cznic/mathutil" @@ -418,34 +419,7 @@ func (c *CMSketch) TotalCount() uint64 { // Equal tests if two CM Sketch equal, it is only used for test. func (c *CMSketch) Equal(rc *CMSketch) bool { - if c == nil || rc == nil { - return c == nil && rc == nil - } - if c.width != rc.width || c.depth != rc.depth || c.count != rc.count || c.defaultValue != rc.defaultValue { - return false - } - for i := range c.table { - for j := range c.table[i] { - if c.table[i][j] != rc.table[i][j] { - return false - } - } - } - if len(c.topN) != len(rc.topN) { - return false - } - for h1, topNData := range c.topN { - if len(topNData) != len(rc.topN[h1]) { - return false - } - for _, val := range topNData { - meta := rc.findTopNMeta(h1, val.h2, val.Data) - if meta == nil || meta.Count != val.Count { - return false - } - } - } - return true + return reflect.DeepEqual(c, rc) } // Copy makes a copy for current CMSketch. diff --git a/statistics/cmsketch_test.go b/statistics/cmsketch_test.go index 44ee8a57dfed6..7a3750cc2f03a 100644 --- a/statistics/cmsketch_test.go +++ b/statistics/cmsketch_test.go @@ -49,12 +49,13 @@ func prepareCMSWithTopN(d, w int32, vals []*types.Datum, n uint32, total uint64) return cms, nil } -func buildCMSketchAndMap(d, w int32, seed int64, total, imax uint64, s float64) (*CMSketch, map[int64]uint32, error) { +// buildCMSketchAndMapWithOffset builds cm sketch using zipf and the generated values starts from `offset`. +func buildCMSketchAndMapWithOffset(d, w int32, seed int64, total, imax uint64, s float64, offset int64) (*CMSketch, map[int64]uint32, error) { cms := NewCMSketch(d, w) mp := make(map[int64]uint32) zipf := rand.NewZipf(rand.New(rand.NewSource(seed)), s, 1, imax) for i := uint64(0); i < total; i++ { - val := types.NewIntDatum(int64(zipf.Uint64())) + val := types.NewIntDatum(int64(zipf.Uint64()) + offset) err := cms.insert(&val) if err != nil { return nil, nil, errors.Trace(err) @@ -64,6 +65,10 @@ func buildCMSketchAndMap(d, w int32, seed int64, total, imax uint64, s float64) return cms, mp, nil } +func buildCMSketchAndMap(d, w int32, seed int64, total, imax uint64, s float64) (*CMSketch, map[int64]uint32, error) { + return buildCMSketchAndMapWithOffset(d, w, seed, total, imax, s, 0) +} + func buildCMSketchTopNAndMap(d, w, n, sample int32, seed int64, total, imax uint64, s float64) (*CMSketch, map[int64]uint32, error) { mp := make(map[int64]uint32) zipf := rand.NewZipf(rand.New(rand.NewSource(seed)), s, 1, imax) @@ -195,6 +200,56 @@ func (s *testStatisticsSuite) TestCMSketchTopN(c *C) { } } +func (s *testStatisticsSuite) TestMergeCMSketch4IncrementalAnalyze(c *C) { + tests := []struct { + zipfFactor float64 + avgError uint64 + }{ + { + zipfFactor: 1.0000001, + avgError: 48, + }, + { + zipfFactor: 1.1, + avgError: 48, + }, + { + zipfFactor: 2, + avgError: 128, + }, + { + zipfFactor: 5, + avgError: 256, + }, + } + d, w := int32(5), int32(2048) + total, imax := uint64(100000), uint64(1000000) + for _, t := range tests { + lSketch, lMap, err := buildCMSketchAndMap(d, w, 0, total, imax, t.zipfFactor) + c.Check(err, IsNil) + avg, err := averageAbsoluteError(lSketch, lMap) + c.Assert(err, IsNil) + c.Check(avg, LessEqual, t.avgError) + + rSketch, rMap, err := buildCMSketchAndMapWithOffset(d, w, 1, total, imax, t.zipfFactor, int64(imax)) + c.Check(err, IsNil) + avg, err = averageAbsoluteError(rSketch, rMap) + c.Assert(err, IsNil) + c.Check(avg, LessEqual, t.avgError) + + for key, val := range rMap { + lMap[key] += val + } + lSketch.MergeCMSketch4IncrementalAnalyze(rSketch) + avg, err = averageAbsoluteError(lSketch, lMap) + c.Assert(err, IsNil) + c.Check(avg, LessEqual, t.avgError) + width, depth := lSketch.GetWidthAndDepth() + c.Assert(width, Equals, int32(2048)) + c.Assert(depth, Equals, int32(5)) + } +} + func (s *testStatisticsSuite) TestCMSketchTopNUniqueData(c *C) { d, w := int32(5), int32(2048) total := uint64(1000000) diff --git a/statistics/estimate.go b/statistics/estimate.go index 2112a8fc3767c..952034186bdb8 100644 --- a/statistics/estimate.go +++ b/statistics/estimate.go @@ -15,6 +15,8 @@ package statistics import ( "math" + + "github.com/cznic/mathutil" ) // calculateEstimateNDV calculates the estimate ndv of a sampled data from a multisize with size total. @@ -42,12 +44,7 @@ func calculateEstimateNDV(h *topNHelper, rowCount uint64) (ndv uint64, scaleRati d := float64(sampleNDV) ndv = uint64(math.Sqrt(N/n)*f1 + d - f1 + 0.5) - - if ndv < sampleNDV { - ndv = sampleNDV - } - if ndv > rowCount { - ndv = rowCount - } + ndv = mathutil.MaxUint64(ndv, sampleNDV) + ndv = mathutil.MinUint64(ndv, rowCount) return ndv, scaleRatio } diff --git a/statistics/feedback.go b/statistics/feedback.go index 83a7a0cec0068..fe6c9e75802c2 100644 --- a/statistics/feedback.go +++ b/statistics/feedback.go @@ -366,9 +366,7 @@ func (b *BucketFeedback) getBoundaries(num int) []types.Datum { err := types.SortDatums(nil, vals) if err != nil { logutil.Logger(context.Background()).Debug("sort datums failed", zap.Error(err)) - vals = vals[:0] - vals = append(vals, *b.lower, *b.upper) - return vals + return []types.Datum{*b.lower, *b.upper} } total, interval := 0, len(vals)/num // Pick values per `interval`. @@ -772,10 +770,7 @@ func EncodeFeedback(q *QueryFeedback) ([]byte, error) { var buf bytes.Buffer enc := gob.NewEncoder(&buf) err = enc.Encode(pb) - if err != nil { - return nil, errors.Trace(err) - } - return buf.Bytes(), nil + return buf.Bytes(), errors.Trace(err) } func decodeFeedbackForIndex(q *QueryFeedback, pb *queryFeedback, c *CMSketch) { @@ -865,41 +860,9 @@ func DecodeFeedback(val []byte, q *QueryFeedback, c *CMSketch, ft *types.FieldTy } else if len(pb.IntRanges) > 0 { decodeFeedbackForPK(q, pb, mysql.HasUnsignedFlag(ft.Flag)) } else { - err := decodeFeedbackForColumn(q, pb, ft) - if err != nil { - return errors.Trace(err) - } - } - return nil -} - -// Equal tests if two query feedback equal, it is only used in test. -func (q *QueryFeedback) Equal(rq *QueryFeedback) bool { - if len(q.Feedback) != len(rq.Feedback) { - return false + err = decodeFeedbackForColumn(q, pb, ft) } - for i, fb := range q.Feedback { - rfb := rq.Feedback[i] - if fb.Count != rfb.Count { - return false - } - if fb.Lower.Kind() == types.KindInt64 { - if fb.Lower.GetInt64() != rfb.Lower.GetInt64() { - return false - } - if fb.Upper.GetInt64() != rfb.Upper.GetInt64() { - return false - } - } else { - if !bytes.Equal(fb.Lower.GetBytes(), rfb.Lower.GetBytes()) { - return false - } - if !bytes.Equal(fb.Upper.GetBytes(), rfb.Upper.GetBytes()) { - return false - } - } - } - return true + return err } // SplitFeedbackByQueryType splits the feedbacks into equality feedbacks and range feedbacks. @@ -947,9 +910,8 @@ func SupportColumnType(ft *types.FieldType) bool { mysql.TypeDouble, mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeBlob, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeNewDecimal, mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp: return true - default: - return false } + return false } // GetMaxValue returns the max value datum for each type. diff --git a/statistics/feedback_test.go b/statistics/feedback_test.go index 4bec1e93ce2ad..4a056ac1551d0 100644 --- a/statistics/feedback_test.go +++ b/statistics/feedback_test.go @@ -14,6 +14,8 @@ package statistics import ( + "bytes" + . "github.com/pingcap/check" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/types" @@ -256,3 +258,32 @@ func (s *testFeedbackSuite) TestFeedbackEncoding(c *C) { q.Feedback = q.Feedback[:1] c.Assert(q.Equal(rq), IsTrue) } + +// Equal tests if two query feedback equal, it is only used in test. +func (q *QueryFeedback) Equal(rq *QueryFeedback) bool { + if len(q.Feedback) != len(rq.Feedback) { + return false + } + for i, fb := range q.Feedback { + rfb := rq.Feedback[i] + if fb.Count != rfb.Count { + return false + } + if fb.Lower.Kind() == types.KindInt64 { + if fb.Lower.GetInt64() != rfb.Lower.GetInt64() { + return false + } + if fb.Upper.GetInt64() != rfb.Upper.GetInt64() { + return false + } + } else { + if !bytes.Equal(fb.Lower.GetBytes(), rfb.Lower.GetBytes()) { + return false + } + if !bytes.Equal(fb.Upper.GetBytes(), rfb.Upper.GetBytes()) { + return false + } + } + } + return true +} diff --git a/statistics/handle/ddl.go b/statistics/handle/ddl.go index 14f6baec5f412..454fab7b93987 100644 --- a/statistics/handle/ddl.go +++ b/statistics/handle/ddl.go @@ -82,23 +82,15 @@ func (h *Handle) insertTableStats2KV(info *model.TableInfo, physicalID int64) (e return errors.Trace(err) } startTS := txn.StartTS() - _, err = exec.Execute(context.Background(), fmt.Sprintf("insert into mysql.stats_meta (version, table_id) values(%d, %d)", startTS, physicalID)) - if err != nil { - return - } + sqls := make([]string, 0, 1+len(info.Columns)+len(info.Indices)) + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_meta (version, table_id) values(%d, %d)", startTS, physicalID)) for _, col := range info.Columns { - _, err = exec.Execute(context.Background(), fmt.Sprintf("insert into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version) values(%d, 0, %d, 0, %d)", physicalID, col.ID, startTS)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version) values(%d, 0, %d, 0, %d)", physicalID, col.ID, startTS)) } for _, idx := range info.Indices { - _, err = exec.Execute(context.Background(), fmt.Sprintf("insert into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version) values(%d, 1, %d, 0, %d)", physicalID, idx.ID, startTS)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version) values(%d, 1, %d, 0, %d)", physicalID, idx.ID, startTS)) } - return + return execSQLs(context.Background(), exec, sqls) } // insertColStats2KV insert a record to stats_histograms with distinct_count 1 and insert a bucket to stats_buckets with default value. @@ -148,28 +140,21 @@ func (h *Handle) insertColStats2KV(physicalID int64, colInfo *model.ColumnInfo) if err != nil { return } + sqls := make([]string, 0, 1) if value.IsNull() { // If the adding column has default value null, all the existing rows have null value on the newly added column. - _, err = exec.Execute(ctx, fmt.Sprintf("insert into mysql.stats_histograms (version, table_id, is_index, hist_id, distinct_count, null_count) values (%d, %d, 0, %d, 0, %d)", startTS, physicalID, colInfo.ID, count)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_histograms (version, table_id, is_index, hist_id, distinct_count, null_count) values (%d, %d, 0, %d, 0, %d)", startTS, physicalID, colInfo.ID, count)) } else { // If this stats exists, we insert histogram meta first, the distinct_count will always be one. - _, err = exec.Execute(ctx, fmt.Sprintf("insert into mysql.stats_histograms (version, table_id, is_index, hist_id, distinct_count, tot_col_size) values (%d, %d, 0, %d, 1, %d)", startTS, physicalID, colInfo.ID, int64(len(value.GetBytes()))*count)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_histograms (version, table_id, is_index, hist_id, distinct_count, tot_col_size) values (%d, %d, 0, %d, 1, %d)", startTS, physicalID, colInfo.ID, int64(len(value.GetBytes()))*count)) value, err = value.ConvertTo(h.mu.ctx.GetSessionVars().StmtCtx, types.NewFieldType(mysql.TypeBlob)) if err != nil { return } // There must be only one bucket for this new column and the value is the default value. - _, err = exec.Execute(ctx, fmt.Sprintf("insert into mysql.stats_buckets (table_id, is_index, hist_id, bucket_id, repeats, count, lower_bound, upper_bound) values (%d, 0, %d, 0, %d, %d, X'%X', X'%X')", physicalID, colInfo.ID, count, count, value.GetBytes(), value.GetBytes())) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_buckets (table_id, is_index, hist_id, bucket_id, repeats, count, lower_bound, upper_bound) values (%d, 0, %d, 0, %d, %d, X'%X', X'%X')", physicalID, colInfo.ID, count, count, value.GetBytes(), value.GetBytes())) } + return execSQLs(context.Background(), exec, sqls) } return } @@ -184,3 +169,13 @@ func finishTransaction(ctx context.Context, exec sqlexec.SQLExecutor, err error) } return errors.Trace(err) } + +func execSQLs(ctx context.Context, exec sqlexec.SQLExecutor, sqls []string) error { + for _, sql := range sqls { + _, err := exec.Execute(ctx, sql) + if err != nil { + return err + } + } + return nil +} diff --git a/statistics/handle/dump.go b/statistics/handle/dump.go index c9edb9d8eca5a..16295569d76c0 100644 --- a/statistics/handle/dump.go +++ b/statistics/handle/dump.go @@ -168,10 +168,7 @@ func (h *Handle) loadStatsFromJSON(tableInfo *model.TableInfo, physicalID int64, } } err = h.SaveMetaToStorage(tbl.PhysicalID, tbl.Count, tbl.ModifyCount) - if err != nil { - return errors.Trace(err) - } - return nil + return err } // TableStatsFromJSON loads statistic from JSONTable and return the Table of statistic. diff --git a/statistics/handle/gc.go b/statistics/handle/gc.go index 0cc79e89667e7..af05d3b114270 100644 --- a/statistics/handle/gc.go +++ b/statistics/handle/gc.go @@ -112,24 +112,16 @@ func (h *Handle) deleteHistStatsFromKV(physicalID int64, histID int64, isIndex i return errors.Trace(err) } startTS := txn.StartTS() + sqls := make([]string, 0, 4) // First of all, we update the version. If this table doesn't exist, it won't have any problem. Because we cannot delete anything. - _, err = exec.Execute(context.Background(), fmt.Sprintf("update mysql.stats_meta set version = %d where table_id = %d ", startTS, physicalID)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("update mysql.stats_meta set version = %d where table_id = %d ", startTS, physicalID)) // delete histogram meta - _, err = exec.Execute(context.Background(), fmt.Sprintf("delete from mysql.stats_histograms where table_id = %d and hist_id = %d and is_index = %d", physicalID, histID, isIndex)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_histograms where table_id = %d and hist_id = %d and is_index = %d", physicalID, histID, isIndex)) // delete top n data - _, err = exec.Execute(context.Background(), fmt.Sprintf("delete from mysql.stats_top_n where table_id = %d and hist_id = %d and is_index = %d", physicalID, histID, isIndex)) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_top_n where table_id = %d and hist_id = %d and is_index = %d", physicalID, histID, isIndex)) // delete all buckets - _, err = exec.Execute(context.Background(), fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d and hist_id = %d and is_index = %d", physicalID, histID, isIndex)) - return + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d and hist_id = %d and is_index = %d", physicalID, histID, isIndex)) + return execSQLs(context.Background(), exec, sqls) } // DeleteTableStatsFromKV deletes table statistics from kv. @@ -149,16 +141,10 @@ func (h *Handle) DeleteTableStatsFromKV(physicalID int64) (err error) { return errors.Trace(err) } startTS := txn.StartTS() + sqls := make([]string, 0, 3) // We only update the version so that other tidb will know that this table is deleted. - sql := fmt.Sprintf("update mysql.stats_meta set version = %d where table_id = %d ", startTS, physicalID) - _, err = exec.Execute(context.Background(), sql) - if err != nil { - return - } - _, err = exec.Execute(context.Background(), fmt.Sprintf("delete from mysql.stats_histograms where table_id = %d", physicalID)) - if err != nil { - return - } - _, err = exec.Execute(context.Background(), fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d", physicalID)) - return + sqls = append(sqls, fmt.Sprintf("update mysql.stats_meta set version = %d where table_id = %d ", startTS, physicalID)) + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_histograms where table_id = %d", physicalID)) + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d", physicalID)) + return execSQLs(context.Background(), exec, sqls) } diff --git a/statistics/handle/gc_test.go b/statistics/handle/gc_test.go index bfb5b0a90ffe7..e84cc38816dfb 100644 --- a/statistics/handle/gc_test.go +++ b/statistics/handle/gc_test.go @@ -25,20 +25,21 @@ func (s *testStatsSuite) TestGCStats(c *C) { defer cleanEnv(c, s.store, s.do) testKit := testkit.NewTestKit(c, s.store) testKit.MustExec("use test") - testKit.MustExec("create table t(a int, b int, index idx(a, b))") + testKit.MustExec("create table t(a int, b int, index idx(a, b), index idx_a(a))") testKit.MustExec("insert into t values (1,1),(2,2),(3,3)") testKit.MustExec("analyze table t") testKit.MustExec("alter table t drop index idx") - testKit.MustQuery("select count(*) from mysql.stats_histograms").Check(testkit.Rows("3")) - testKit.MustQuery("select count(*) from mysql.stats_buckets").Check(testkit.Rows("9")) + testKit.MustQuery("select count(*) from mysql.stats_histograms").Check(testkit.Rows("4")) + testKit.MustQuery("select count(*) from mysql.stats_buckets").Check(testkit.Rows("12")) h := s.do.StatsHandle() h.SetLastUpdateVersion(math.MaxUint64) ddlLease := time.Duration(0) c.Assert(h.GCStats(s.do.InfoSchema(), ddlLease), IsNil) - testKit.MustQuery("select count(*) from mysql.stats_histograms").Check(testkit.Rows("2")) - testKit.MustQuery("select count(*) from mysql.stats_buckets").Check(testkit.Rows("6")) + testKit.MustQuery("select count(*) from mysql.stats_histograms").Check(testkit.Rows("3")) + testKit.MustQuery("select count(*) from mysql.stats_buckets").Check(testkit.Rows("9")) + testKit.MustExec("alter table t drop index idx_a") testKit.MustExec("alter table t drop column a") c.Assert(h.GCStats(s.do.InfoSchema(), ddlLease), IsNil) testKit.MustQuery("select count(*) from mysql.stats_histograms").Check(testkit.Rows("1")) diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 912dbed1e6865..cbc739f820c5b 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -326,11 +326,8 @@ func (h *Handle) cmSketchFromStorage(tblID int64, isIndex, histID int64, history } else { rows, _, err = h.restrictedExec.ExecRestrictedSQL(nil, selSQL) } - if err != nil { - return nil, errors.Trace(err) - } - if len(rows) == 0 { - return nil, nil + if err != nil || len(rows) == 0 { + return nil, err } return statistics.LoadCMSketchWithTopN(h.restrictedExec, tblID, isIndex, histID, rows[0].GetBytes(0)) } @@ -491,22 +488,18 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, physicalID in } else { rows, _, err = h.restrictedExec.ExecRestrictedSQL(nil, selSQL) } - if err != nil { - return nil, err - } // Check deleted table. - if len(rows) == 0 { + if err != nil || len(rows) == 0 { return nil, nil } for _, row := range rows { if row.GetInt64(1) > 0 { - if err := h.indexStatsFromStorage(row, table, tableInfo, historyStatsExec); err != nil { - return nil, errors.Trace(err) - } + err = h.indexStatsFromStorage(row, table, tableInfo, historyStatsExec) } else { - if err := h.columnStatsFromStorage(row, table, tableInfo, loadAll, historyStatsExec); err != nil { - return nil, errors.Trace(err) - } + err = h.columnStatsFromStorage(row, table, tableInfo, loadAll, historyStatsExec) + } + if err != nil { + return nil, err } } return table, nil @@ -531,49 +524,29 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg } version := txn.StartTS() - var sql string + sqls := make([]string, 0, 4) // If the count is less than 0, then we do not want to update the modify count and count. if count >= 0 { - sql = fmt.Sprintf("replace into mysql.stats_meta (version, table_id, count) values (%d, %d, %d)", version, tableID, count) + sqls = append(sqls, fmt.Sprintf("replace into mysql.stats_meta (version, table_id, count) values (%d, %d, %d)", version, tableID, count)) } else { - sql = fmt.Sprintf("update mysql.stats_meta set version = %d where table_id = %d", version, tableID) - } - _, err = exec.Execute(ctx, sql) - if err != nil { - return + sqls = append(sqls, fmt.Sprintf("update mysql.stats_meta set version = %d where table_id = %d", version, tableID)) } data, err := statistics.EncodeCMSketchWithoutTopN(cms) if err != nil { return } // Delete outdated data - deleteOutdatedTopNSQL := fmt.Sprintf("delete from mysql.stats_top_n where table_id = %d and is_index = %d and hist_id = %d", tableID, isIndex, hg.ID) - _, err = exec.Execute(ctx, deleteOutdatedTopNSQL) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_top_n where table_id = %d and is_index = %d and hist_id = %d", tableID, isIndex, hg.ID)) for _, meta := range cms.TopN() { - insertSQL := fmt.Sprintf("insert into mysql.stats_top_n (table_id, is_index, hist_id, value, count) values (%d, %d, %d, X'%X', %d)", tableID, isIndex, hg.ID, meta.Data, meta.Count) - _, err = exec.Execute(ctx, insertSQL) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_top_n (table_id, is_index, hist_id, value, count) values (%d, %d, %d, X'%X', %d)", tableID, isIndex, hg.ID, meta.Data, meta.Count)) } flag := 0 if isAnalyzed == 1 { flag = statistics.AnalyzeFlag } - replaceSQL := fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)", - tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotColSize, statistics.CurStatsVersion, flag, hg.Correlation) - _, err = exec.Execute(ctx, replaceSQL) - if err != nil { - return - } - deleteSQL := fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d", tableID, isIndex, hg.ID) - _, err = exec.Execute(ctx, deleteSQL) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("replace into mysql.stats_histograms (table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, flag, correlation) values (%d, %d, %d, %d, %d, %d, X'%X', %d, %d, %d, %f)", + tableID, isIndex, hg.ID, hg.NDV, version, hg.NullCount, data, hg.TotColSize, statistics.CurStatsVersion, flag, hg.Correlation)) + sqls = append(sqls, fmt.Sprintf("delete from mysql.stats_buckets where table_id = %d and is_index = %d and hist_id = %d", tableID, isIndex, hg.ID)) sc := h.mu.ctx.GetSessionVars().StmtCtx var lastAnalyzePos []byte for i := range hg.Buckets { @@ -594,20 +567,12 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg if err != nil { return } - insertSQL := fmt.Sprintf("insert into mysql.stats_buckets(table_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X')", tableID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes()) - _, err = exec.Execute(ctx, insertSQL) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("insert into mysql.stats_buckets(table_id, is_index, hist_id, bucket_id, count, repeats, lower_bound, upper_bound) values(%d, %d, %d, %d, %d, %d, X'%X', X'%X')", tableID, isIndex, hg.ID, i, count, hg.Buckets[i].Repeat, lowerBound.GetBytes(), upperBound.GetBytes())) } if isAnalyzed == 1 && len(lastAnalyzePos) > 0 { - sql = fmt.Sprintf("update mysql.stats_histograms set last_analyze_pos = X'%X' where table_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, tableID, isIndex, hg.ID) - _, err = exec.Execute(ctx, sql) - if err != nil { - return - } + sqls = append(sqls, fmt.Sprintf("update mysql.stats_histograms set last_analyze_pos = X'%X' where table_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, tableID, isIndex, hg.ID)) } - return + return execSQLs(context.Background(), exec, sqls) } // SaveMetaToStorage will save stats_meta to storage. diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 5d6d13767ef1a..2241b41cdd8a3 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -23,6 +23,7 @@ import ( "sync" "time" + "github.com/cznic/mathutil" "github.com/pingcap/errors" "github.com/pingcap/log" "github.com/pingcap/parser/model" @@ -159,13 +160,9 @@ func (s *SessionStatsCollector) Update(id int64, delta int64, count int64, colSi } func mergeQueryFeedback(lq []*statistics.QueryFeedback, rq []*statistics.QueryFeedback) []*statistics.QueryFeedback { - for _, q := range rq { - if len(lq) >= int(MaxQueryFeedbackCount.Load()) { - break - } - lq = append(lq, q) - } - return lq + remained := mathutil.MinInt64(int64(len(rq)), MaxQueryFeedbackCount.Load()-int64(len(lq))) + remained = mathutil.MaxInt64(0, remained) + return append(lq, rq[:remained]...) } var ( @@ -337,10 +334,7 @@ func (h *Handle) dumpTableStatCountToKV(id int64, delta variable.TableDelta) (up } else { sql = fmt.Sprintf("update mysql.stats_meta set version = %d, count = count + %d, modify_count = modify_count + %d where table_id = %d", startTS, delta.Delta, delta.Count, id) } - _, err = h.mu.ctx.(sqlexec.SQLExecutor).Execute(ctx, sql) - if err != nil { - return - } + err = execSQLs(context.Background(), exec, []string{sql}) updated = h.mu.ctx.GetSessionVars().StmtCtx.AffectedRows() > 0 return } @@ -691,10 +685,7 @@ func parseAnalyzePeriod(start, end string) (time.Time, time.Time, error) { return s, s, errors.Trace(err) } e, err := time.ParseInLocation(variable.AnalyzeFullTimeFormat, end, time.UTC) - if err != nil { - return s, e, errors.Trace(err) - } - return s, e, nil + return s, e, err } // HandleAutoAnalyze analyzes the newly created table or index. @@ -746,10 +737,7 @@ func (h *Handle) autoAnalyzeTable(tblInfo *model.TableInfo, statsTbl *statistics return true } for _, idx := range tblInfo.Indices { - if idx.State != model.StatePublic { - continue - } - if _, ok := statsTbl.Indices[idx.ID]; !ok { + if _, ok := statsTbl.Indices[idx.ID]; !ok && idx.State == model.StatePublic { sql = fmt.Sprintf("%s index `%s`", sql, idx.Name.O) logutil.Logger(context.Background()).Info("[stats] auto analyze for unanalyzed", zap.String("sql", sql)) h.execAutoAnalyze(sql) @@ -944,11 +932,8 @@ func (h *Handle) RecalculateExpectCount(q *statistics.QueryFeedback) error { expected, err = c.GetColumnRowCount(sc, ranges, t.ModifyCount) expected *= c.GetIncreaseFactor(t.Count) } - if err != nil { - return errors.Trace(err) - } q.Expected = int64(expected) - return nil + return err } func (h *Handle) dumpRangeFeedback(sc *stmtctx.StatementContext, ran *ranger.Range, rangeCount float64, q *statistics.QueryFeedback) error { diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go index 456dbad12d290..0fe42424015e1 100644 --- a/statistics/handle/update_test.go +++ b/statistics/handle/update_test.go @@ -741,7 +741,7 @@ func (s *testStatsSuite) TestQueryFeedback(c *C) { }{ { // test primary key feedback - sql: "select * from t where t.a <= 5", + sql: "select * from t where t.a <= 5 order by a desc", hist: "column:1 ndv:4 totColSize:0\n" + "num: 1 lower_bound: -9223372036854775808 upper_bound: 1 repeats: 0\n" + "num: 1 lower_bound: 2 upper_bound: 2 repeats: 1\n" + diff --git a/statistics/histogram.go b/statistics/histogram.go index 73adeee757bac..dadaa7e930990 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -221,10 +221,7 @@ func ValueToString(value *types.Datum, idxCols int) (string, error) { return "", errors.Trace(err) } str, err := types.DatumsToString(decodedVals, true) - if err != nil { - return "", errors.Trace(err) - } - return str, nil + return str, err } // BucketToString change the given bucket to string format. @@ -273,10 +270,7 @@ func (hg *Histogram) equalRowCount(value types.Datum) float64 { // greaterRowCount estimates the row count where the column greater than value. func (hg *Histogram) greaterRowCount(value types.Datum) float64 { gtCount := hg.notNullCount() - hg.lessRowCount(value) - hg.equalRowCount(value) - if gtCount < 0 { - gtCount = 0 - } - return gtCount + return math.Max(0, gtCount) } // LessRowCountWithBktIdx estimates the row count where the column less than value. @@ -595,9 +589,7 @@ func (hg *Histogram) AvgCountPerNotNullValue(totalCount int64) float64 { factor := hg.GetIncreaseFactor(totalCount) totalNotNull := hg.notNullCount() * factor curNDV := float64(hg.NDV) * factor - if curNDV == 0 { - curNDV = 1 - } + curNDV = math.Max(curNDV, 1) return totalNotNull / curNDV } diff --git a/statistics/histogram_test.go b/statistics/histogram_test.go index e131b143e7306..1ce1682076217 100644 --- a/statistics/histogram_test.go +++ b/statistics/histogram_test.go @@ -95,6 +95,7 @@ num: 60 lower_bound: ssssssu upper_bound: yyyyy repeats: 0` c.Assert(newColl.Columns[2].String(), Equals, stringColResult) idx := &Index{Info: &model.IndexInfo{Columns: []*model.IndexColumn{{Name: model.NewCIStr("a"), Offset: 0}}}} + coll.Indices[0] = idx idx.Histogram = *NewHistogram(0, 15, 0, 0, types.NewFieldType(mysql.TypeBlob), 0, 0) for i := 0; i < 5; i++ { low, err1 := codec.EncodeKey(sc, nil, types.NewIntDatum(int64(i*3))) @@ -116,7 +117,16 @@ num: 30 lower_bound: 3 upper_bound: 5 repeats: 10 num: 30 lower_bound: 9 upper_bound: 11 repeats: 10 num: 30 lower_bound: 12 upper_bound: 14 repeats: 10` - newIdx, err := idx.newIndexBySelectivity(sc, node3) - c.Assert(err, IsNil, Commentf("Test failed: %v", err)) - c.Assert(newIdx.String(), Equals, idxResult) + newColl = coll.NewHistCollBySelectivity(sc, []*StatsNode{node3}) + c.Assert(newColl.Indices[0].String(), Equals, idxResult) +} + +func (s *testStatisticsSuite) TestTruncateHistogram(c *C) { + hist := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeLonglong), 1, 0) + low, high := types.NewIntDatum(0), types.NewIntDatum(1) + hist.AppendBucket(&low, &high, 0, 1) + newHist := hist.TruncateHistogram(1) + c.Assert(HistogramEqual(hist, newHist, true), IsTrue) + newHist = hist.TruncateHistogram(0) + c.Assert(newHist.Len(), Equals, 0) } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index efe34266f3c5d..ad0918a85ba8c 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -229,6 +229,10 @@ func (s *testStatsSuite) TestSelectivity(c *C) { statsTbl := s.prepareSelectivity(testKit, c) is := s.do.InfoSchema() + longExpr := "0 < a and a = 1 " + for i := 1; i < 64; i++ { + longExpr += fmt.Sprintf(" and a > %d ", i) + } tests := []struct { exprs string selectivity float64 @@ -265,6 +269,10 @@ func (s *testStatsSuite) TestSelectivity(c *C) { exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", selectivity: 0, }, + { + exprs: longExpr, + selectivity: 0.001, + }, } for _, tt := range tests { sql := "select * from t where " + tt.exprs