diff --git a/cmd/explaintest/r/explain_easy.result b/cmd/explaintest/r/explain_easy.result index b1f9ba92be976..dcb09bf7de748 100644 --- a/cmd/explaintest/r/explain_easy.result +++ b/cmd/explaintest/r/explain_easy.result @@ -79,9 +79,9 @@ TopN_7 1.00 root test.t2.c2:asc, offset:0, count:1 └─TableScan_13 10000.00 cop[tikv] table:t2, range:[-inf,+inf], keep order:false, stats:pseudo explain select * from t1 where c1 > 1 and c2 = 1 and c3 < 1; id count task operator info -IndexLookUp_11 1.11 root +IndexLookUp_11 11.08 root ├─IndexScan_8 33.33 cop[tikv] table:t1, index:c2, range:(1 1,1 +inf], keep order:false, stats:pseudo -└─Selection_10 1.11 cop[tikv] lt(test.t1.c3, 1) +└─Selection_10 11.08 cop[tikv] lt(test.t1.c3, 1) └─TableScan_9 33.33 cop[tikv] table:t1, keep order:false, stats:pseudo explain select * from t1 where c1 = 1 and c2 > 1; id count task operator info diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index 4da7696e17949..4d3b73d69c18c 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/tidb/expression/aggregation" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" @@ -300,7 +301,7 @@ func (p *LogicalJoin) constructIndexJoin( innerTask task, ranges []*ranger.Range, keyOff2IdxOff []int, - path *accessPath, + path *util.AccessPath, compareFilters *ColWithCmpFuncManager, ) []PhysicalPlan { joinType := p.JoinType @@ -353,7 +354,7 @@ func (p *LogicalJoin) constructIndexJoin( CompareFilters: compareFilters, }.Init(p.ctx, p.stats.ScaleByExpectCnt(prop.ExpectedCnt), p.blockOffset, chReqProps...) if path != nil { - join.IdxColLens = path.idxColLens + join.IdxColLens = path.IdxColLens } join.SetSchema(p.schema) return []PhysicalPlan{join} @@ -365,7 +366,7 @@ func (p *LogicalJoin) constructIndexMergeJoin( innerTask task, ranges []*ranger.Range, keyOff2IdxOff []int, - path *accessPath, + path *util.AccessPath, compareFilters *ColWithCmpFuncManager, ) []PhysicalPlan { indexJoins := p.constructIndexJoin(prop, outerIdx, innerTask, ranges, keyOff2IdxOff, path, compareFilters) @@ -442,7 +443,7 @@ func (p *LogicalJoin) constructIndexHashJoin( innerTask task, ranges []*ranger.Range, keyOff2IdxOff []int, - path *accessPath, + path *util.AccessPath, compareFilters *ColWithCmpFuncManager, ) []PhysicalPlan { indexJoins := p.constructIndexJoin(prop, outerIdx, innerTask, ranges, keyOff2IdxOff, path, compareFilters) @@ -519,9 +520,9 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou func (p *LogicalJoin) buildIndexJoinInner2TableScan( prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column, outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) { - var tblPath *accessPath + var tblPath *util.AccessPath for _, path := range ds.possibleAccessPaths { - if path.isTablePath && path.storeType == kv.TiKV { + if path.IsTablePath && path.StoreType == kv.TiKV { tblPath = path break } @@ -568,7 +569,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan( outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) { helper := &indexJoinBuildHelper{join: p} for _, path := range ds.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { continue } emptyRange, err := helper.analyzeLookUpFilters(path, ds, innerJoinKeys) @@ -592,7 +593,7 @@ func (p *LogicalJoin) buildIndexJoinInner2IndexScan( } } joins = make([]PhysicalPlan, 0, 3) - rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.idxCols, outerJoinKeys) + rangeInfo := helper.buildRangeDecidedByInformation(helper.chosenPath.IdxCols, outerJoinKeys) innerTask := p.constructInnerIndexScanTask(ds, helper.chosenPath, helper.chosenRemained, outerJoinKeys, us, rangeInfo, false, false, avgInnerRowCnt) joins = append(joins, p.constructIndexJoin(prop, outerIdx, innerTask, helper.chosenRanges, keyOff2IdxOff, helper.chosenPath, helper.lastColManager)...) @@ -620,7 +621,7 @@ type indexJoinBuildHelper struct { idxOff2KeyOff []int lastColManager *ColWithCmpFuncManager chosenRanges []*ranger.Range - chosenPath *accessPath + chosenPath *util.AccessPath curPossibleUsedKeys []*expression.Column curNotUsedIndexCols []*expression.Column @@ -720,7 +721,7 @@ func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader Physi // constructInnerIndexScanTask is specially used to construct the inner plan for PhysicalIndexJoin. func (p *LogicalJoin) constructInnerIndexScanTask( ds *DataSource, - path *accessPath, + path *util.AccessPath, filterConds []expression.Expression, outerJoinKeys []*expression.Column, us *LogicalUnionScan, @@ -734,9 +735,9 @@ func (p *LogicalJoin) constructInnerIndexScanTask( TableAsName: ds.TableAsName, DBName: ds.DBName, Columns: ds.Columns, - Index: path.index, - IdxCols: path.idxCols, - IdxColLens: path.idxColLens, + Index: path.Index, + IdxCols: path.IdxCols, + IdxColLens: path.IdxColLens, dataSourceSchema: ds.schema, KeepOrder: keepOrder, Ranges: ranger.FullRange(), @@ -752,7 +753,7 @@ func (p *LogicalJoin) constructInnerIndexScanTask( tblCols: ds.TblCols, keepOrder: is.KeepOrder, } - if !isCoveringIndex(ds.schema.Columns, path.fullIdxCols, path.fullIdxColLens, is.Table.PKIsHandle) { + if !isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, is.Table.PKIsHandle) { // On this way, it's double read case. ts := PhysicalTableScan{ Columns: ds.Columns, @@ -768,24 +769,24 @@ func (p *LogicalJoin) constructInnerIndexScanTask( } cop.tablePlan = ts } - is.initSchema(path.index, path.fullIdxCols, cop.tablePlan != nil) - rowSize := is.indexScanRowSize(path.index, ds, true) + is.initSchema(path.Index, path.FullIdxCols, cop.tablePlan != nil) + rowSize := is.indexScanRowSize(path.Index, ds, true) sessVars := ds.ctx.GetSessionVars() cop.cst = rowCount * rowSize * sessVars.ScanFactor - indexConds, tblConds := splitIndexFilterConditions(filterConds, path.fullIdxCols, path.fullIdxColLens, ds.tableInfo) - tmpPath := &accessPath{ - indexFilters: indexConds, - tableFilters: tblConds, - countAfterAccess: rowCount, + indexConds, tblConds := splitIndexFilterConditions(filterConds, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + tmpPath := &util.AccessPath{ + IndexFilters: indexConds, + TableFilters: tblConds, + CountAfterAccess: rowCount, } // Assume equal conditions used by index join and other conditions are independent. if len(indexConds) > 0 { - selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds) + selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, indexConds, nil) if err != nil { logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err)) selectivity = selectionFactor } - tmpPath.countAfterIndex = rowCount * selectivity + tmpPath.CountAfterIndex = rowCount * selectivity } selectivity := ds.stats.RowCount / ds.tableStats.RowCount finalStats := ds.stats.ScaleByExpectCnt(selectivity * rowCount) @@ -987,15 +988,15 @@ func (ijHelper *indexJoinBuildHelper) removeUselessEqAndInFunc( return notKeyEqAndIn, nil } -func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, innerPlan *DataSource, innerJoinKeys []*expression.Column) (emptyRange bool, err error) { - if len(path.idxCols) == 0 { +func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *util.AccessPath, innerPlan *DataSource, innerJoinKeys []*expression.Column) (emptyRange bool, err error) { + if len(path.IdxCols) == 0 { return false, nil } - accesses := make([]expression.Expression, 0, len(path.idxCols)) - ijHelper.resetContextForIndex(innerJoinKeys, path.idxCols, path.idxColLens) + accesses := make([]expression.Expression, 0, len(path.IdxCols)) + ijHelper.resetContextForIndex(innerJoinKeys, path.IdxCols, path.IdxColLens) notKeyEqAndIn, remained, rangeFilterCandidates := ijHelper.findUsefulEqAndInFilters(innerPlan) var remainedEqAndIn []expression.Expression - notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(path.idxCols, notKeyEqAndIn) + notKeyEqAndIn, remainedEqAndIn = ijHelper.removeUselessEqAndInFunc(path.IdxCols, notKeyEqAndIn) matchedKeyCnt := len(ijHelper.curPossibleUsedKeys) // If no join key is matched while join keys actually are not empty. We don't choose index join for now. if matchedKeyCnt <= 0 && len(innerJoinKeys) > 0 { @@ -1010,7 +1011,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn return false, nil } // If all the index columns are covered by eq/in conditions, we don't need to consider other conditions anymore. - if lastColPos == len(path.idxCols) { + if lastColPos == len(path.IdxCols) { // If there's join key matched index column. Then choose hash join is always a better idea. // e.g. select * from t1, t2 where t2.a=1 and t2.b=1. And t2 has index(a, b). // If we don't have the following check, TiDB will build index join for this case. @@ -1028,10 +1029,10 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn ijHelper.updateBestChoice(ranges, path, accesses, remained, nil) return false, nil } - lastPossibleCol := path.idxCols[lastColPos] + lastPossibleCol := path.IdxCols[lastColPos] lastColManager := &ColWithCmpFuncManager{ TargetCol: lastPossibleCol, - colLength: path.idxColLens[lastColPos], + colLength: path.IdxColLens[lastColPos], affectedColSchema: expression.NewSchema(), } lastColAccess := ijHelper.buildLastColManager(lastPossibleCol, innerPlan, lastColManager) @@ -1047,7 +1048,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn var ranges, nextColRange []*ranger.Range var err error if len(colAccesses) > 0 { - nextColRange, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx.GetSessionVars().StmtCtx, lastPossibleCol.RetType, path.idxColLens[lastColPos]) + nextColRange, err = ranger.BuildColumnRange(colAccesses, ijHelper.join.ctx.GetSessionVars().StmtCtx, lastPossibleCol.RetType, path.IdxColLens[lastColPos]) if err != nil { return false, err } @@ -1060,7 +1061,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn return true, nil } remained = append(remained, colRemained...) - if path.idxColLens[lastColPos] != types.UnspecifiedLength { + if path.IdxColLens[lastColPos] != types.UnspecifiedLength { remained = append(remained, colAccesses...) } accesses = append(accesses, colAccesses...) @@ -1080,7 +1081,7 @@ func (ijHelper *indexJoinBuildHelper) analyzeLookUpFilters(path *accessPath, inn return false, nil } -func (ijHelper *indexJoinBuildHelper) updateBestChoice(ranges []*ranger.Range, path *accessPath, accesses, +func (ijHelper *indexJoinBuildHelper) updateBestChoice(ranges []*ranger.Range, path *util.AccessPath, accesses, remained []expression.Expression, lastColManager *ColWithCmpFuncManager) { // We choose the index by the number of used columns of the range, the much the better. // Notice that there may be the cases like `t1.a=t2.a and b > 2 and b < 1`. So ranges can be nil though the conditions are valid. diff --git a/planner/core/exhaust_physical_plans_test.go b/planner/core/exhaust_physical_plans_test.go index 98fba2158aaa6..dcfe267628535 100644 --- a/planner/core/exhaust_physical_plans_test.go +++ b/planner/core/exhaust_physical_plans_test.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/types" ) @@ -120,9 +121,9 @@ func (s *testUnitTestSuit) TestIndexJoinAnalyzeLookUpFilters(c *C) { DBName: model.NewCIStr("test"), }) joinNode.SetSchema(expression.MergeSchema(dsSchema, outerChildSchema)) - path := &accessPath{ - idxCols: append(make([]*expression.Column, 0, 4), dsSchema.Columns...), - idxColLens: []int{types.UnspecifiedLength, types.UnspecifiedLength, 2, types.UnspecifiedLength}, + path := &util.AccessPath{ + IdxCols: append(make([]*expression.Column, 0, 4), dsSchema.Columns...), + IdxColLens: []int{types.UnspecifiedLength, types.UnspecifiedLength, 2, types.UnspecifiedLength}, } joinColNames := append(dsNames.Shallow(), outerChildNames...) diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index e5633b82a1970..2aab18735460c 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" @@ -216,7 +217,7 @@ func (ds *DataSource) tryToGetDualTask() (task, error) { // candidatePath is used to maintain required info for skyline pruning. type candidatePath struct { - path *accessPath + path *util.AccessPath columnSet *intsets.Sparse // columnSet is the set of columns that occurred in the access conditions. isSingleScan bool isMatchProp bool @@ -275,41 +276,41 @@ func compareCandidates(lhs, rhs *candidatePath) int { return 0 } -func (ds *DataSource) getTableCandidate(path *accessPath, prop *property.PhysicalProperty) *candidatePath { +func (ds *DataSource) getTableCandidate(path *util.AccessPath, prop *property.PhysicalProperty) *candidatePath { candidate := &candidatePath{path: path} pkCol := ds.getPKIsHandleCol() if len(prop.Items) == 1 && pkCol != nil { candidate.isMatchProp = prop.Items[0].Col.Equal(nil, pkCol) - if path.storeType == kv.TiFlash { + if path.StoreType == kv.TiFlash { candidate.isMatchProp = candidate.isMatchProp && !prop.Items[0].Desc } } - candidate.columnSet = expression.ExtractColumnSet(path.accessConds) + candidate.columnSet = expression.ExtractColumnSet(path.AccessConds) candidate.isSingleScan = true return candidate } -func (ds *DataSource) getIndexCandidate(path *accessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath { +func (ds *DataSource) getIndexCandidate(path *util.AccessPath, prop *property.PhysicalProperty, isSingleScan bool) *candidatePath { candidate := &candidatePath{path: path} all, _ := prop.AllSameOrder() // When the prop is empty or `all` is false, `isMatchProp` is better to be `false` because // it needs not to keep order for index scan. if !prop.IsEmpty() && all { - for i, col := range path.idxCols { + for i, col := range path.IdxCols { if col.Equal(nil, prop.Items[0].Col) { - candidate.isMatchProp = matchIndicesProp(path.idxCols[i:], path.idxColLens[i:], prop.Items) + candidate.isMatchProp = matchIndicesProp(path.IdxCols[i:], path.IdxColLens[i:], prop.Items) break - } else if i >= path.eqCondCount { + } else if i >= path.EqCondCount { break } } } - candidate.columnSet = expression.ExtractColumnSet(path.accessConds) + candidate.columnSet = expression.ExtractColumnSet(path.AccessConds) candidate.isSingleScan = isSingleScan return candidate } -func (ds *DataSource) getIndexMergeCandidate(path *accessPath) *candidatePath { +func (ds *DataSource) getIndexMergeCandidate(path *util.AccessPath) *candidatePath { candidate := &candidatePath{path: path} return candidate } @@ -319,20 +320,20 @@ func (ds *DataSource) getIndexMergeCandidate(path *accessPath) *candidatePath { func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candidatePath { candidates := make([]*candidatePath, 0, 4) for _, path := range ds.possibleAccessPaths { - if path.partialIndexPaths != nil { + if path.PartialIndexPaths != nil { candidates = append(candidates, ds.getIndexMergeCandidate(path)) continue } // if we already know the range of the scan is empty, just return a TableDual - if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { + if len(path.Ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { return []*candidatePath{{path: path}} } var currentCandidate *candidatePath - if path.isTablePath { + if path.IsTablePath { currentCandidate = ds.getTableCandidate(path, prop) } else { - coveredByIdx := isCoveringIndex(ds.schema.Columns, path.fullIdxCols, path.fullIdxColLens, ds.tableInfo.PKIsHandle) - if len(path.accessConds) > 0 || !prop.IsEmpty() || path.forced || coveredByIdx { + coveredByIdx := isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo.PKIsHandle) + if len(path.AccessConds) > 0 || !prop.IsEmpty() || path.Forced || coveredByIdx { // We will use index to generate physical plan if any of the following conditions is satisfied: // 1. This path's access cond is not nil. // 2. We have a non-empty prop to match. @@ -345,7 +346,7 @@ func (ds *DataSource) skylinePruning(prop *property.PhysicalProperty) []*candida } pruned := false for i := len(candidates) - 1; i >= 0; i-- { - if candidates[i].path.storeType == kv.TiFlash { + if candidates[i].path.StoreType == kv.TiFlash { continue } result := compareCandidates(candidates[i], currentCandidate) @@ -416,7 +417,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err for _, candidate := range candidates { path := candidate.path - if path.partialIndexPaths != nil { + if path.PartialIndexPaths != nil { idxMergeTask, err := ds.convertToIndexMergeScan(prop, candidate) if err != nil { return nil, err @@ -427,18 +428,18 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty) (t task, err continue } // if we already know the range of the scan is empty, just return a TableDual - if len(path.ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { + if len(path.Ranges) == 0 && !ds.ctx.GetSessionVars().StmtCtx.UseCache { dual := PhysicalTableDual{}.Init(ds.ctx, ds.stats, ds.blockOffset) dual.SetSchema(ds.schema) return &rootTask{ p: dual, }, nil } - if path.isTablePath { - if ds.preferStoreType&preferTiFlash != 0 && path.storeType == kv.TiKV { + if path.IsTablePath { + if ds.preferStoreType&preferTiFlash != 0 && path.StoreType == kv.TiKV { continue } - if ds.preferStoreType&preferTiKV != 0 && path.storeType == kv.TiFlash { + if ds.preferStoreType&preferTiKV != 0 && path.StoreType == kv.TiFlash { continue } tblTask, err := ds.convertToTableScan(prop, candidate) @@ -472,17 +473,17 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c } path := candidate.path var totalCost, totalRowCount float64 - scans := make([]PhysicalPlan, 0, len(path.partialIndexPaths)) + scans := make([]PhysicalPlan, 0, len(path.PartialIndexPaths)) cop := &copTask{ indexPlanFinished: true, tblColHists: ds.TblColHists, } allCovered := true - for _, partPath := range path.partialIndexPaths { + for _, partPath := range path.PartialIndexPaths { var scan PhysicalPlan var partialCost, rowCount float64 var tempCovered bool - if partPath.isTablePath { + if partPath.IsTablePath { scan, partialCost, rowCount, tempCovered = ds.convertToPartialTableScan(prop, partPath) } else { scan, partialCost, rowCount, tempCovered = ds.convertToPartialIndexScan(prop, partPath) @@ -493,8 +494,8 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c allCovered = allCovered && tempCovered } - if !allCovered || len(path.tableFilters) > 0 { - ts, partialCost := ds.buildIndexMergeTableScan(prop, path.tableFilters, totalRowCount) + if !allCovered || len(path.TableFilters) > 0 { + ts, partialCost := ds.buildIndexMergeTableScan(prop, path.TableFilters, totalRowCount) totalCost += partialCost cop.tablePlan = ts } @@ -504,22 +505,22 @@ func (ds *DataSource) convertToIndexMergeScan(prop *property.PhysicalProperty, c return task, nil } -func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *accessPath) ( +func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, path *util.AccessPath) ( indexPlan PhysicalPlan, partialCost float64, rowCount float64, isCovered bool) { - idx := path.index + idx := path.Index is, partialCost, rowCount := ds.getOriginalPhysicalIndexScan(prop, path, false, false) rowSize := is.indexScanRowSize(idx, ds, false) - isCovered = isCoveringIndex(ds.schema.Columns, path.fullIdxCols, path.fullIdxColLens, ds.tableInfo.PKIsHandle) - indexConds := path.indexFilters + isCovered = isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo.PKIsHandle) + indexConds := path.IndexFilters sessVars := ds.ctx.GetSessionVars() if indexConds != nil { var selectivity float64 partialCost += rowCount * sessVars.CopCPUFactor - if path.countAfterAccess > 0 { - selectivity = path.countAfterIndex / path.countAfterAccess + if path.CountAfterAccess > 0 { + selectivity = path.CountAfterIndex / path.CountAfterAccess } rowCount = is.stats.RowCount * selectivity stats := &property.StatsInfo{RowCount: rowCount} @@ -537,7 +538,7 @@ func (ds *DataSource) convertToPartialIndexScan(prop *property.PhysicalProperty, return indexPlan, partialCost, rowCount, isCovered } -func (ds *DataSource) convertToPartialTableScan(prop *property.PhysicalProperty, path *accessPath) ( +func (ds *DataSource) convertToPartialTableScan(prop *property.PhysicalProperty, path *util.AccessPath) ( tablePlan PhysicalPlan, partialCost float64, rowCount float64, @@ -546,7 +547,7 @@ func (ds *DataSource) convertToPartialTableScan(prop *property.PhysicalProperty, rowSize := ds.TblColHists.GetAvgRowSize(ds.TblCols, false) sessVars := ds.ctx.GetSessionVars() if len(ts.filterCondition) > 0 { - selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, ts.filterCondition) + selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, ts.filterCondition, nil) if err != nil { logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err)) selectivity = selectionFactor @@ -589,7 +590,7 @@ func (ds *DataSource) buildIndexMergeTableScan(prop *property.PhysicalProperty, } if len(tableFilters) > 0 { partialCost += totalRowCount * sessVars.CopCPUFactor - selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, tableFilters) + selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, tableFilters, nil) if err != nil { logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err)) selectivity = selectionFactor @@ -749,9 +750,9 @@ func (is *PhysicalIndexScan) initSchema(idx *model.IndexInfo, idxExprCols []*exp is.SetSchema(expression.NewSchema(indexCols...)) } -func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, path *accessPath, finalStats *property.StatsInfo) { +func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSource, path *util.AccessPath, finalStats *property.StatsInfo) { // Add filter condition to table plan now. - indexConds, tableConds := path.indexFilters, path.tableFilters + indexConds, tableConds := path.IndexFilters, path.TableFilters tableConds, copTask.rootTaskConds = splitSelCondsWithVirtualColumn(tableConds) @@ -759,8 +760,8 @@ func (is *PhysicalIndexScan) addPushedDownSelection(copTask *copTask, p *DataSou if indexConds != nil { copTask.cst += copTask.count() * sessVars.CopCPUFactor var selectivity float64 - if path.countAfterAccess > 0 { - selectivity = path.countAfterIndex / path.countAfterAccess + if path.CountAfterAccess > 0 { + selectivity = path.CountAfterIndex / path.CountAfterAccess } count := is.stats.RowCount * selectivity stats := p.tableStats.ScaleByExpectCnt(count) @@ -905,19 +906,19 @@ func convertRangeFromExpectedCnt(ranges []*ranger.Range, rangeCounts []float64, return convertedRanges, count, false } -// crossEstimateRowCount estimates row count of table scan using histogram of another column which is in tableFilters +// crossEstimateRowCount estimates row count of table scan using histogram of another column which is in TableFilters // and has high order correlation with handle column. For example, if the query is like: // `select * from tbl where a = 1 order by pk limit 1` // if order of column `a` is strictly correlated with column `pk`, the row count of table scan should be: // `1 + row_count(a < 1 or a is null)` -func (ds *DataSource) crossEstimateRowCount(path *accessPath, expectedCnt float64, desc bool) (float64, bool, float64) { - if ds.statisticTable.Pseudo || len(path.tableFilters) == 0 { +func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, expectedCnt float64, desc bool) (float64, bool, float64) { + if ds.statisticTable.Pseudo || len(path.TableFilters) == 0 { return 0, false, 0 } - col, corr := getMostCorrColFromExprs(path.tableFilters, ds.statisticTable, ds.ctx.GetSessionVars().CorrelationThreshold) + col, corr := getMostCorrColFromExprs(path.TableFilters, ds.statisticTable, ds.ctx.GetSessionVars().CorrelationThreshold) // If table scan is not full range scan, we cannot use histogram of other columns for estimation, because // the histogram reflects value distribution in the whole table level. - if col == nil || len(path.accessConds) > 0 { + if col == nil || len(path.AccessConds) > 0 { return 0, false, corr } colInfoID := col.ID @@ -926,7 +927,7 @@ func (ds *DataSource) crossEstimateRowCount(path *accessPath, expectedCnt float6 if colHist.Correlation < 0 { desc = !desc } - accessConds, remained := ranger.DetachCondsForColumn(ds.ctx, path.tableFilters, col) + accessConds, remained := ranger.DetachCondsForColumn(ds.ctx, path.TableFilters, col) if len(accessConds) == 0 { return 0, false, corr } @@ -945,7 +946,7 @@ func (ds *DataSource) crossEstimateRowCount(path *accessPath, expectedCnt float6 } convertedRanges, count, isFull := convertRangeFromExpectedCnt(ranges, rangeCounts, expectedCnt, desc) if isFull { - return path.countAfterAccess, true, 0 + return path.CountAfterAccess, true, 0 } var rangeCount float64 if idxExists { @@ -960,7 +961,7 @@ func (ds *DataSource) crossEstimateRowCount(path *accessPath, expectedCnt float6 if len(remained) > 0 { scanCount = scanCount / selectionFactor } - scanCount = math.Min(scanCount, path.countAfterAccess) + scanCount = math.Min(scanCount, path.CountAfterAccess) return scanCount, true, 0 } @@ -1053,7 +1054,7 @@ func (ts *PhysicalTableScan) addPushedDownSelection(copTask *copTask, stats *pro } } -func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProperty, path *accessPath, isMatchProp bool) (*PhysicalTableScan, float64, float64) { +func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProperty, path *util.AccessPath, isMatchProp bool) (*PhysicalTableScan, float64, float64) { ts := PhysicalTableScan{ Table: ds.tableInfo, Columns: ds.Columns, @@ -1061,10 +1062,10 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper DBName: ds.DBName, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, - Ranges: path.ranges, - AccessCondition: path.accessConds, - filterCondition: path.tableFilters, - StoreType: path.storeType, + Ranges: path.Ranges, + AccessCondition: path.AccessConds, + filterCondition: path.TableFilters, + StoreType: path.StoreType, }.Init(ds.ctx, ds.blockOffset) if ts.StoreType == kv.TiFlash { // Append the AccessCondition to filterCondition because TiFlash only support full range scan for each @@ -1080,7 +1081,7 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper } } } - rowCount := path.countAfterAccess + rowCount := path.CountAfterAccess if prop.ExpectedCnt < ds.stats.RowCount { count, ok, corr := ds.crossEstimateRowCount(path, prop.ExpectedCnt, isMatchProp && prop.Items[0].Desc) if ok { @@ -1130,18 +1131,18 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper return ts, cost, rowCount } -func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProperty, path *accessPath, isMatchProp bool, isSingleScan bool) (*PhysicalIndexScan, float64, float64) { - idx := path.index +func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProperty, path *util.AccessPath, isMatchProp bool, isSingleScan bool) (*PhysicalIndexScan, float64, float64) { + idx := path.Index is := PhysicalIndexScan{ Table: ds.tableInfo, TableAsName: ds.TableAsName, DBName: ds.DBName, Columns: ds.Columns, Index: idx, - IdxCols: path.idxCols, - IdxColLens: path.idxColLens, - AccessCondition: path.accessConds, - Ranges: path.ranges, + IdxCols: path.IdxCols, + IdxColLens: path.IdxColLens, + AccessCondition: path.AccessConds, + Ranges: path.Ranges, dataSourceSchema: ds.schema, isPartition: ds.isPartition, physicalTableID: ds.physicalTableID, @@ -1150,13 +1151,13 @@ func (ds *DataSource) getOriginalPhysicalIndexScan(prop *property.PhysicalProper if statsTbl.Indices[idx.ID] != nil { is.Hist = &statsTbl.Indices[idx.ID].Histogram } - rowCount := path.countAfterAccess - is.initSchema(idx, path.fullIdxCols, !isSingleScan) + rowCount := path.CountAfterAccess + is.initSchema(idx, path.FullIdxCols, !isSingleScan) // Only use expectedCnt when it's smaller than the count we calculated. // e.g. IndexScan(count1)->After Filter(count2). The `ds.stats.RowCount` is count2. count1 is the one we need to calculate // If expectedCnt and count2 are both zero and we go into the below `if` block, the count1 will be set to zero though it's shouldn't be. if (isMatchProp || prop.IsEmpty()) && prop.ExpectedCnt < ds.stats.RowCount { - selectivity := ds.stats.RowCount / path.countAfterAccess + selectivity := ds.stats.RowCount / path.CountAfterAccess rowCount = math.Min(prop.ExpectedCnt/selectivity, rowCount) } is.stats = ds.tableStats.ScaleByExpectCnt(rowCount) diff --git a/planner/core/indexmerge_test.go b/planner/core/indexmerge_test.go index bdbc86001c58f..bbcc701a0754a 100644 --- a/planner/core/indexmerge_test.go +++ b/planner/core/indexmerge_test.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/parser" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/infoschema" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/util/testleak" "github.com/pingcap/tidb/util/testutil" @@ -49,7 +50,7 @@ func (s *testIndexMergeSuite) TearDownSuite(c *C) { c.Assert(s.testdata.GenerateOutputIfNeeded(), IsNil) } -func getIndexMergePathDigest(paths []*accessPath, startIndex int) string { +func getIndexMergePathDigest(paths []*util.AccessPath, startIndex int) string { if len(paths) == startIndex { return "[]" } @@ -60,18 +61,18 @@ func getIndexMergePathDigest(paths []*accessPath, startIndex int) string { } path := paths[i] idxMergeDisgest += "{Idxs:[" - for j := 0; j < len(path.partialIndexPaths); j++ { + for j := 0; j < len(path.PartialIndexPaths); j++ { if j > 0 { idxMergeDisgest += "," } - idxMergeDisgest += path.partialIndexPaths[j].index.Name.L + idxMergeDisgest += path.PartialIndexPaths[j].Index.Name.L } idxMergeDisgest += "],TbFilters:[" - for j := 0; j < len(path.tableFilters); j++ { + for j := 0; j < len(path.TableFilters); j++ { if j > 0 { idxMergeDisgest += "," } - idxMergeDisgest += path.tableFilters[j].String() + idxMergeDisgest += path.TableFilters[j].String() } idxMergeDisgest += "]}" } diff --git a/planner/core/logical_plan_builder.go b/planner/core/logical_plan_builder.go index 8a2a8f31c11fb..15d8f29567d47 100644 --- a/planner/core/logical_plan_builder.go +++ b/planner/core/logical_plan_builder.go @@ -38,6 +38,7 @@ import ( "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/privilege" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/statistics" @@ -452,7 +453,7 @@ func (ds *DataSource) setPreferredStoreType(hintInfo *tableHintInfo) { ds.preferStoreType |= preferTiFlash hasTiFlashPath := false for _, path := range ds.possibleAccessPaths { - if path.storeType == kv.TiFlash { + if path.StoreType == kv.TiFlash { hasTiFlashPath = true break } @@ -460,7 +461,7 @@ func (ds *DataSource) setPreferredStoreType(hintInfo *tableHintInfo) { // TODO: For now, if there is a TiFlash hint for a table, we enforce a TiFlash path. But hint is just a suggestion // for the planner. We can keep it since we need it to debug with PD and TiFlash. In future, this should be removed. if !hasTiFlashPath { - ds.possibleAccessPaths = append(ds.possibleAccessPaths, &accessPath{isTablePath: true, storeType: kv.TiFlash}) + ds.possibleAccessPaths = append(ds.possibleAccessPaths, &util.AccessPath{IsTablePath: true, StoreType: kv.TiFlash}) } } } @@ -2631,10 +2632,10 @@ func (b *PlanBuilder) buildDataSource(ctx context.Context, tn *ast.TableName, as ds.names = names ds.setPreferredStoreType(b.TableHints()) - // Init fullIdxCols, fullIdxColLens for accessPaths. + // Init FullIdxCols, FullIdxColLens for accessPaths. for _, path := range ds.possibleAccessPaths { - if !path.isTablePath { - path.fullIdxCols, path.fullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.index) + if !path.IsTablePath { + path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index) } } diff --git a/planner/core/logical_plan_test.go b/planner/core/logical_plan_test.go index 79a1dd565c86f..8c8f139ae323b 100644 --- a/planner/core/logical_plan_test.go +++ b/planner/core/logical_plan_test.go @@ -1349,10 +1349,10 @@ func byItemsToProperty(byItems []*ByItems) *property.PhysicalProperty { func pathsName(paths []*candidatePath) string { var names []string for _, path := range paths { - if path.path.isTablePath { + if path.path.IsTablePath { names = append(names, "PRIMARY_KEY") } else { - names = append(names, path.path.index.Name.O) + names = append(names, path.path.Index.Name.O) } } return strings.Join(names, ",") diff --git a/planner/core/logical_plans.go b/planner/core/logical_plans.go index 265713839186f..5bfa6614b4638 100644 --- a/planner/core/logical_plans.go +++ b/planner/core/logical_plans.go @@ -22,8 +22,8 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" - "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/table" "github.com/pingcap/tidb/types" @@ -408,7 +408,7 @@ type DataSource struct { tableStats *property.StatsInfo // possibleAccessPaths stores all the possible access path for physical plan, including table scan. - possibleAccessPaths []*accessPath + possibleAccessPaths []*util.AccessPath // The data source may be a partition, rather than a real table. isPartition bool @@ -486,37 +486,10 @@ func (p *LogicalIndexScan) MatchIndexProp(prop *property.PhysicalProperty) (matc return false } -// accessPath indicates the way we access a table: by using single index, or by using multiple indexes, -// or just by using table scan. -type accessPath struct { - index *model.IndexInfo - fullIdxCols []*expression.Column - fullIdxColLens []int - idxCols []*expression.Column - idxColLens []int - ranges []*ranger.Range - // countAfterAccess is the row count after we apply range seek and before we use other filter to filter data. - countAfterAccess float64 - // countAfterIndex is the row count after we apply filters on index and before we apply the table filters. - countAfterIndex float64 - accessConds []expression.Expression - eqCondCount int - indexFilters []expression.Expression - tableFilters []expression.Expression - // isTablePath indicates whether this path is table path. - isTablePath bool - storeType kv.StoreType - // forced means this path is generated by `use/force index()`. - forced bool - // partialIndexPaths store all index access paths. - // If there are extra filters, store them in tableFilters. - partialIndexPaths []*accessPath -} - // getTablePath finds the TablePath from a group of accessPaths. -func getTablePath(paths []*accessPath) *accessPath { +func getTablePath(paths []*util.AccessPath) *util.AccessPath { for _, path := range paths { - if path.isTablePath { + if path.IsTablePath { return path } } @@ -532,11 +505,11 @@ func (ds *DataSource) buildTableGather() LogicalPlan { return sg } -func (ds *DataSource) buildIndexGather(path *accessPath) LogicalPlan { +func (ds *DataSource) buildIndexGather(path *util.AccessPath) LogicalPlan { is := LogicalIndexScan{ Source: ds, IsDoubleRead: false, - Index: path.index, + Index: path.Index, }.Init(ds.ctx, ds.blockOffset) is.Columns = make([]*model.ColumnInfo, len(ds.Columns)) @@ -546,7 +519,7 @@ func (ds *DataSource) buildIndexGather(path *accessPath) LogicalPlan { sg := TiKVSingleGather{ Source: ds, IsIndexGather: true, - Index: path.index, + Index: path.Index, }.Init(ds.ctx, ds.blockOffset) sg.SetSchema(ds.Schema()) sg.SetChildren(is) @@ -558,10 +531,10 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) { tg := ds.buildTableGather() gathers = append(gathers, tg) for _, path := range ds.possibleAccessPaths { - if !path.isTablePath { - path.fullIdxCols, path.fullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.index) + if !path.IsTablePath { + path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index) // If index columns can cover all of the needed columns, we can use a IndexGather + IndexScan. - if isCoveringIndex(ds.schema.Columns, path.fullIdxCols, path.fullIdxColLens, ds.tableInfo.PKIsHandle) { + if isCoveringIndex(ds.schema.Columns, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo.PKIsHandle) { gathers = append(gathers, ds.buildIndexGather(path)) } // TODO: If index columns can not cover the schema, use IndexLookUpGather. @@ -570,14 +543,14 @@ func (ds *DataSource) Convert2Gathers() (gathers []LogicalPlan) { return gathers } -// deriveTablePathStats will fulfill the information that the accessPath need. +// deriveTablePathStats will fulfill the information that the AccessPath need. // And it will check whether the primary key is covered only by point query. // isIm indicates whether this function is called to generate the partial path for IndexMerge. -func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression.Expression, isIm bool) (bool, error) { +func (ds *DataSource) deriveTablePathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) (bool, error) { var err error sc := ds.ctx.GetSessionVars().StmtCtx - path.countAfterAccess = float64(ds.statisticTable.Count) - path.tableFilters = conds + path.CountAfterAccess = float64(ds.statisticTable.Count) + path.TableFilters = conds var pkCol *expression.Column columnLen := len(ds.schema.Columns) isUnsigned := false @@ -590,20 +563,20 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression. pkCol = ds.schema.Columns[columnLen-1] } if pkCol == nil { - path.ranges = ranger.FullIntRange(isUnsigned) + path.Ranges = ranger.FullIntRange(isUnsigned) return false, nil } - path.ranges = ranger.FullIntRange(isUnsigned) + path.Ranges = ranger.FullIntRange(isUnsigned) if len(conds) == 0 { return false, nil } - path.accessConds, path.tableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol) + path.AccessConds, path.TableFilters = ranger.DetachCondsForColumn(ds.ctx, conds, pkCol) // If there's no access cond, we try to find that whether there's expression containing correlated column that // can be used to access data. corColInAccessConds := false - if len(path.accessConds) == 0 { - for i, filter := range path.tableFilters { + if len(path.AccessConds) == 0 { + for i, filter := range path.TableFilters { eqFunc, ok := filter.(*expression.ScalarFunction) if !ok || eqFunc.FuncName.L != ast.EQ { continue @@ -612,8 +585,8 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression. if lOk && lCol.Equal(ds.ctx, pkCol) { _, rOk := eqFunc.GetArgs()[1].(*expression.CorrelatedColumn) if rOk { - path.accessConds = append(path.accessConds, filter) - path.tableFilters = append(path.tableFilters[:i], path.tableFilters[i+1:]...) + path.AccessConds = append(path.AccessConds, filter) + path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...) corColInAccessConds = true break } @@ -622,8 +595,8 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression. if rOk && rCol.Equal(ds.ctx, pkCol) { _, lOk := eqFunc.GetArgs()[0].(*expression.CorrelatedColumn) if lOk { - path.accessConds = append(path.accessConds, filter) - path.tableFilters = append(path.tableFilters[:i], path.tableFilters[i+1:]...) + path.AccessConds = append(path.AccessConds, filter) + path.TableFilters = append(path.TableFilters[:i], path.TableFilters[i+1:]...) corColInAccessConds = true break } @@ -631,22 +604,22 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression. } } if corColInAccessConds { - path.countAfterAccess = 1 + path.CountAfterAccess = 1 return true, nil } - path.ranges, err = ranger.BuildTableRange(path.accessConds, sc, pkCol.RetType) + path.Ranges, err = ranger.BuildTableRange(path.AccessConds, sc, pkCol.RetType) if err != nil { return false, err } - path.countAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.ranges) - // If the `countAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. + path.CountAfterAccess, err = ds.statisticTable.GetRowCountByIntColumnRanges(sc, pkCol.ID, path.Ranges) + // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. // We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity. - if path.countAfterAccess < ds.stats.RowCount && !isIm { - path.countAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count)) + if path.CountAfterAccess < ds.stats.RowCount && !isIm { + path.CountAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count)) } // Check whether the primary key is covered by point query. noIntervalRange := true - for _, ran := range path.ranges { + for _, ran := range path.Ranges { if !ran.IsPoint(sc) { noIntervalRange = false break @@ -655,90 +628,95 @@ func (ds *DataSource) deriveTablePathStats(path *accessPath, conds []expression. return noIntervalRange, err } -// deriveIndexPathStats will fulfill the information that the accessPath need. -// And it will check whether this index is full matched by point query. We will use this check to -// determine whether we remove other paths or not. -// conds is the conditions used to generate the DetachRangeResult for path. -// isIm indicates whether this function is called to generate the partial path for IndexMerge. -func (ds *DataSource) deriveIndexPathStats(path *accessPath, conds []expression.Expression, isIm bool) (bool, error) { +func (ds *DataSource) fillIndexPath(path *util.AccessPath, conds []expression.Expression) error { sc := ds.ctx.GetSessionVars().StmtCtx - path.ranges = ranger.FullRange() - path.countAfterAccess = float64(ds.statisticTable.Count) - path.idxCols, path.idxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.index) - path.fullIdxCols, path.fullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.index) - if !path.index.Unique && !path.index.Primary && len(path.index.Columns) == len(path.idxCols) { + path.Ranges = ranger.FullRange() + path.CountAfterAccess = float64(ds.statisticTable.Count) + path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.schema.Columns, path.Index) + path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.schema.Columns, path.Index) + if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) { handleCol := ds.getPKIsHandleCol() if handleCol != nil && !mysql.HasUnsignedFlag(handleCol.RetType.Flag) { - path.idxCols = append(path.idxCols, handleCol) - path.idxColLens = append(path.idxColLens, types.UnspecifiedLength) + path.IdxCols = append(path.IdxCols, handleCol) + path.IdxColLens = append(path.IdxColLens, types.UnspecifiedLength) } } - eqOrInCount := 0 - if len(path.idxCols) != 0 { - res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.idxCols, path.idxColLens) + if len(path.IdxCols) != 0 { + res, err := ranger.DetachCondAndBuildRangeForIndex(ds.ctx, conds, path.IdxCols, path.IdxColLens) if err != nil { - return false, err - } - path.ranges = res.Ranges - path.accessConds = res.AccessConds - path.tableFilters = res.RemainedConds - path.eqCondCount = res.EqCondCount - eqOrInCount = res.EqOrInCount - path.countAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.index.ID, path.ranges) + return err + } + path.Ranges = res.Ranges + path.AccessConds = res.AccessConds + path.TableFilters = res.RemainedConds + path.EqCondCount = res.EqCondCount + path.EqOrInCondCount = res.EqOrInCount + path.IsDNFCond = res.IsDNFCond + path.CountAfterAccess, err = ds.tableStats.HistColl.GetRowCountByIndexRanges(sc, path.Index.ID, path.Ranges) if err != nil { - return false, err + return err } } else { - path.tableFilters = conds + path.TableFilters = conds } - if eqOrInCount == len(path.accessConds) { - accesses, remained := path.splitCorColAccessCondFromFilters(eqOrInCount) - path.accessConds = append(path.accessConds, accesses...) - path.tableFilters = remained + return nil +} + +// deriveIndexPathStats will fulfill the information that the AccessPath need. +// And it will check whether this index is full matched by point query. We will use this check to +// determine whether we remove other paths or not. +// conds is the conditions used to generate the DetachRangeResult for path. +// isIm indicates whether this function is called to generate the partial path for IndexMerge. +func (ds *DataSource) deriveIndexPathStats(path *util.AccessPath, conds []expression.Expression, isIm bool) bool { + sc := ds.ctx.GetSessionVars().StmtCtx + if path.EqOrInCondCount == len(path.AccessConds) { + accesses, remained := path.SplitCorColAccessCondFromFilters(path.EqOrInCondCount) + path.AccessConds = append(path.AccessConds, accesses...) + path.TableFilters = remained if len(accesses) > 0 && ds.statisticTable.Pseudo { - path.countAfterAccess = ds.statisticTable.PseudoAvgCountPerValue() + path.CountAfterAccess = ds.statisticTable.PseudoAvgCountPerValue() } else { - selectivity := path.countAfterAccess / float64(ds.statisticTable.Count) + selectivity := path.CountAfterAccess / float64(ds.statisticTable.Count) for i := range accesses { - col := path.idxCols[eqOrInCount+i] + col := path.IdxCols[path.EqOrInCondCount+i] ndv := ds.getColumnNDV(col.ID) ndv *= selectivity if ndv < 1 { ndv = 1.0 } - path.countAfterAccess = path.countAfterAccess / ndv + path.CountAfterAccess = path.CountAfterAccess / ndv } } } - path.indexFilters, path.tableFilters = splitIndexFilterConditions(path.tableFilters, path.fullIdxCols, path.fullIdxColLens, ds.tableInfo) - // If the `countAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. + path.IndexFilters, path.TableFilters = splitIndexFilterConditions(path.TableFilters, path.FullIdxCols, path.FullIdxColLens, ds.tableInfo) + // If the `CountAfterAccess` is less than `stats.RowCount`, there must be some inconsistent stats info. // We prefer the `stats.RowCount` because it could use more stats info to calculate the selectivity. - if path.countAfterAccess < ds.stats.RowCount && !isIm { - path.countAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count)) + if path.CountAfterAccess < ds.stats.RowCount && !isIm { + path.CountAfterAccess = math.Min(ds.stats.RowCount/selectionFactor, float64(ds.statisticTable.Count)) } - if path.indexFilters != nil { - selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, path.indexFilters) + if path.IndexFilters != nil { + selectivity, _, err := ds.tableStats.HistColl.Selectivity(ds.ctx, path.IndexFilters, nil) if err != nil { logutil.BgLogger().Debug("calculate selectivity failed, use selection factor", zap.Error(err)) selectivity = selectionFactor } if isIm { - path.countAfterIndex = path.countAfterAccess * selectivity + path.CountAfterIndex = path.CountAfterAccess * selectivity } else { - path.countAfterIndex = math.Max(path.countAfterAccess*selectivity, ds.stats.RowCount) + path.CountAfterIndex = math.Max(path.CountAfterAccess*selectivity, ds.stats.RowCount) } } // Check whether there's only point query. noIntervalRanges := true haveNullVal := false - for _, ran := range path.ranges { + for _, ran := range path.Ranges { // Not point or the not full matched. - if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.index.Columns) { + if !ran.IsPoint(sc) || len(ran.HighVal) != len(path.Index.Columns) { noIntervalRanges = false break } // Check whether there's null value. - for i := 0; i < len(path.index.Columns); i++ { + for i := 0; i < len(path.Index.Columns); i++ { if ran.HighVal[i].IsNull() { haveNullVal = true break @@ -748,70 +726,7 @@ func (ds *DataSource) deriveIndexPathStats(path *accessPath, conds []expression. break } } - return noIntervalRanges && !haveNullVal, nil -} - -func (path *accessPath) splitCorColAccessCondFromFilters(eqOrInCount int) (access, remained []expression.Expression) { - access = make([]expression.Expression, len(path.idxCols)-eqOrInCount) - used := make([]bool, len(path.tableFilters)) - for i := eqOrInCount; i < len(path.idxCols); i++ { - matched := false - for j, filter := range path.tableFilters { - if used[j] || !isColEqCorColOrConstant(filter, path.idxCols[i]) { - continue - } - matched = true - access[i-eqOrInCount] = filter - if path.idxColLens[i] == types.UnspecifiedLength { - used[j] = true - } - break - } - if !matched { - access = access[:i-eqOrInCount] - break - } - } - for i, ok := range used { - if !ok { - remained = append(remained, path.tableFilters[i]) - } - } - return access, remained -} - -// getEqOrInColOffset checks if the expression is a eq function that one side is constant or correlated column -// and another is column. -func isColEqCorColOrConstant(filter expression.Expression, col *expression.Column) bool { - f, ok := filter.(*expression.ScalarFunction) - if !ok || f.FuncName.L != ast.EQ { - return false - } - if c, ok := f.GetArgs()[0].(*expression.Column); ok { - if _, ok := f.GetArgs()[1].(*expression.Constant); ok { - if col.Equal(nil, c) { - return true - } - } - if _, ok := f.GetArgs()[1].(*expression.CorrelatedColumn); ok { - if col.Equal(nil, c) { - return true - } - } - } - if c, ok := f.GetArgs()[1].(*expression.Column); ok { - if _, ok := f.GetArgs()[0].(*expression.Constant); ok { - if col.Equal(nil, c) { - return true - } - } - if _, ok := f.GetArgs()[0].(*expression.CorrelatedColumn); ok { - if col.Equal(nil, c) { - return true - } - } - } - return false + return noIntervalRanges && !haveNullVal } func getPKIsHandleColFromSchema(cols []*model.ColumnInfo, schema *expression.Schema, pkIsHandle bool) *expression.Column { diff --git a/planner/core/logical_plans_test.go b/planner/core/logical_plans_test.go index aa225a3f0b732..c083566121eef 100644 --- a/planner/core/logical_plans_test.go +++ b/planner/core/logical_plans_test.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/parser/model" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/testleak" @@ -183,14 +184,14 @@ func (s *testUnitTestSuit) TestIndexPathSplitCorColCond(c *C) { c.Assert(err, IsNil, comment) trueFilters = append(trueFilters, trueFilter) } - path := accessPath{ - eqCondCount: 0, - tableFilters: trueFilters, - idxCols: expression.FindPrefixOfIndex(totalSchema.Columns, tt.idxColIDs), - idxColLens: tt.idxColLens, + path := util.AccessPath{ + EqCondCount: 0, + TableFilters: trueFilters, + IdxCols: expression.FindPrefixOfIndex(totalSchema.Columns, tt.idxColIDs), + IdxColLens: tt.idxColLens, } - access, remained := path.splitCorColAccessCondFromFilters(path.eqCondCount) + access, remained := path.SplitCorColAccessCondFromFilters(path.EqCondCount) c.Assert(fmt.Sprintf("%s", access), Equals, tt.access, comment) c.Assert(fmt.Sprintf("%s", remained), Equals, tt.remained, comment) } diff --git a/planner/core/planbuilder.go b/planner/core/planbuilder.go index 7e1bbc5bbffa4..650361cd67ffe 100644 --- a/planner/core/planbuilder.go +++ b/planner/core/planbuilder.go @@ -33,6 +33,7 @@ import ( "github.com/pingcap/tidb/infoschema" "github.com/pingcap/tidb/kv" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/sessionctx/stmtctx" "github.com/pingcap/tidb/sessionctx/variable" @@ -584,14 +585,14 @@ func (b *PlanBuilder) detectSelectWindow(sel *ast.SelectStmt) bool { return false } -func getPathByIndexName(paths []*accessPath, idxName model.CIStr, tblInfo *model.TableInfo) *accessPath { - var tablePath *accessPath +func getPathByIndexName(paths []*util.AccessPath, idxName model.CIStr, tblInfo *model.TableInfo) *util.AccessPath { + var tablePath *util.AccessPath for _, path := range paths { - if path.isTablePath { + if path.IsTablePath { tablePath = path continue } - if path.index.Name.L == idxName.L { + if path.Index.Name.L == idxName.L { return path } } @@ -605,26 +606,26 @@ func isPrimaryIndex(indexName model.CIStr) bool { return indexName.L == "primary" } -func (b *PlanBuilder) getPossibleAccessPaths(indexHints []*ast.IndexHint, tbl table.Table, dbName, tblName model.CIStr) ([]*accessPath, error) { +func (b *PlanBuilder) getPossibleAccessPaths(indexHints []*ast.IndexHint, tbl table.Table, dbName, tblName model.CIStr) ([]*util.AccessPath, error) { tblInfo := tbl.Meta() - publicPaths := make([]*accessPath, 0, len(tblInfo.Indices)+2) + publicPaths := make([]*util.AccessPath, 0, len(tblInfo.Indices)+2) tp := kv.TiKV if tbl.Type().IsClusterTable() { tp = kv.TiDB } - publicPaths = append(publicPaths, &accessPath{isTablePath: true, storeType: tp}) + publicPaths = append(publicPaths, &util.AccessPath{IsTablePath: true, StoreType: tp}) if tblInfo.TiFlashReplica != nil && tblInfo.TiFlashReplica.Available { - publicPaths = append(publicPaths, &accessPath{isTablePath: true, storeType: kv.TiFlash}) + publicPaths = append(publicPaths, &util.AccessPath{IsTablePath: true, StoreType: kv.TiFlash}) } for _, index := range tblInfo.Indices { if index.State == model.StatePublic { - publicPaths = append(publicPaths, &accessPath{index: index}) + publicPaths = append(publicPaths, &util.AccessPath{Index: index}) } } hasScanHint, hasUseOrForce := false, false - available := make([]*accessPath, 0, len(publicPaths)) - ignored := make([]*accessPath, 0, len(publicPaths)) + available := make([]*util.AccessPath, 0, len(publicPaths)) + ignored := make([]*util.AccessPath, 0, len(publicPaths)) // Extract comment-style index hint like /*+ INDEX(t, idx1, idx2) */. indexHintsLen := len(indexHints) @@ -649,7 +650,7 @@ func (b *PlanBuilder) getPossibleAccessPaths(indexHints []*ast.IndexHint, tbl ta if hint.IndexNames == nil && hint.HintType != ast.HintIgnore { if path := getTablePath(publicPaths); path != nil { hasUseOrForce = true - path.forced = true + path.Forced = true available = append(available, path) } } @@ -672,7 +673,7 @@ func (b *PlanBuilder) getPossibleAccessPaths(indexHints []*ast.IndexHint, tbl ta // Currently we don't distinguish between "FORCE" and "USE" because // our cost estimation is not reliable. hasUseOrForce = true - path.forced = true + path.Forced = true available = append(available, path) } } @@ -686,25 +687,25 @@ func (b *PlanBuilder) getPossibleAccessPaths(indexHints []*ast.IndexHint, tbl ta // If we have got "FORCE" or "USE" index hint but got no available index, // we have to use table scan. if len(available) == 0 { - available = append(available, &accessPath{isTablePath: true}) + available = append(available, &util.AccessPath{IsTablePath: true}) } return available, nil } -func (b *PlanBuilder) filterPathByIsolationRead(paths []*accessPath) ([]*accessPath, error) { +func (b *PlanBuilder) filterPathByIsolationRead(paths []*util.AccessPath) ([]*util.AccessPath, error) { // TODO: filter paths with isolation read locations. isolationReadEngines := b.ctx.GetSessionVars().GetIsolationReadEngines() availableEngine := map[kv.StoreType]struct{}{} var availableEngineStr string for i := len(paths) - 1; i >= 0; i-- { - if _, ok := availableEngine[paths[i].storeType]; !ok { - availableEngine[paths[i].storeType] = struct{}{} + if _, ok := availableEngine[paths[i].StoreType]; !ok { + availableEngine[paths[i].StoreType] = struct{}{} if availableEngineStr != "" { availableEngineStr += ", " } - availableEngineStr += paths[i].storeType.Name() + availableEngineStr += paths[i].StoreType.Name() } - if _, ok := isolationReadEngines[paths[i].storeType]; !ok { + if _, ok := isolationReadEngines[paths[i].StoreType]; !ok { paths = append(paths[:i], paths[i+1:]...) } } @@ -717,13 +718,13 @@ func (b *PlanBuilder) filterPathByIsolationRead(paths []*accessPath) ([]*accessP return paths, err } -func removeIgnoredPaths(paths, ignoredPaths []*accessPath, tblInfo *model.TableInfo) []*accessPath { +func removeIgnoredPaths(paths, ignoredPaths []*util.AccessPath, tblInfo *model.TableInfo) []*util.AccessPath { if len(ignoredPaths) == 0 { return paths } - remainedPaths := make([]*accessPath, 0, len(paths)) + remainedPaths := make([]*util.AccessPath, 0, len(paths)) for _, path := range paths { - if path.isTablePath || getPathByIndexName(ignoredPaths, path.index.Name, tblInfo) == nil { + if path.IsTablePath || getPathByIndexName(ignoredPaths, path.Index.Name, tblInfo) == nil { remainedPaths = append(remainedPaths, path) } } diff --git a/planner/core/planbuilder_test.go b/planner/core/planbuilder_test.go index 3d5c35949696d..0908ff88dde69 100644 --- a/planner/core/planbuilder_test.go +++ b/planner/core/planbuilder_test.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/parser/ast" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/types" ) @@ -71,9 +72,9 @@ func (s *testPlanBuilderSuite) TestGetPathByIndexName(c *C) { PKIsHandle: true, } - accessPath := []*accessPath{ - {isTablePath: true}, - {index: &model.IndexInfo{Name: model.NewCIStr("idx")}}, + accessPath := []*util.AccessPath{ + {IsTablePath: true}, + {Index: &model.IndexInfo{Name: model.NewCIStr("idx")}}, } path := getPathByIndexName(accessPath, model.NewCIStr("idx"), tblInfo) diff --git a/planner/core/property_cols_prune.go b/planner/core/property_cols_prune.go index ebf523d572484..b45e761785b4b 100644 --- a/planner/core/property_cols_prune.go +++ b/planner/core/property_cols_prune.go @@ -21,7 +21,7 @@ func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { result := make([][]*expression.Column, 0, len(ds.possibleAccessPaths)) for _, path := range ds.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { col := ds.getPKIsHandleCol() if col != nil { result = append(result, []*expression.Column{col}) @@ -29,14 +29,14 @@ func (ds *DataSource) preparePossibleProperties() [][]*expression.Column { continue } - if len(path.idxCols) == 0 { + if len(path.IdxCols) == 0 { continue } - result = append(result, make([]*expression.Column, len(path.idxCols))) - copy(result[len(result)-1], path.idxCols) - for i := 0; i < path.eqCondCount && i+1 < len(path.idxCols); i++ { - result = append(result, make([]*expression.Column, len(path.idxCols)-i-1)) - copy(result[len(result)-1], path.idxCols[i+1:]) + result = append(result, make([]*expression.Column, len(path.IdxCols))) + copy(result[len(result)-1], path.IdxCols) + for i := 0; i < path.EqCondCount && i+1 < len(path.IdxCols); i++ { + result = append(result, make([]*expression.Column, len(path.IdxCols)-i-1)) + copy(result[len(result)-1], path.IdxCols[i+1:]) } } return result diff --git a/planner/core/rule_build_key_info.go b/planner/core/rule_build_key_info.go index 2e2a43f39e2c2..e875b85ee85a5 100644 --- a/planner/core/rule_build_key_info.go +++ b/planner/core/rule_build_key_info.go @@ -227,10 +227,10 @@ func checkIndexCanBeKey(idx *model.IndexInfo, columns []*model.ColumnInfo, schem func (ds *DataSource) BuildKeyInfo(selfSchema *expression.Schema, childSchema []*expression.Schema) { selfSchema.Keys = nil for _, path := range ds.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { continue } - if newKey := checkIndexCanBeKey(path.index, ds.Columns, selfSchema); newKey != nil { + if newKey := checkIndexCanBeKey(path.Index, ds.Columns, selfSchema); newKey != nil { selfSchema.Keys = append(selfSchema.Keys, newKey) } } @@ -253,10 +253,10 @@ func (ts *LogicalTableScan) BuildKeyInfo(selfSchema *expression.Schema, childSch func (is *LogicalIndexScan) BuildKeyInfo(selfSchema *expression.Schema, childSchema []*expression.Schema) { selfSchema.Keys = nil for _, path := range is.Source.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { continue } - if newKey := checkIndexCanBeKey(path.index, is.Columns, selfSchema); newKey != nil { + if newKey := checkIndexCanBeKey(path.Index, is.Columns, selfSchema); newKey != nil { selfSchema.Keys = append(selfSchema.Keys, newKey) } } diff --git a/planner/core/rule_join_elimination.go b/planner/core/rule_join_elimination.go index c27be9823d3da..6398c19be5939 100644 --- a/planner/core/rule_join_elimination.go +++ b/planner/core/rule_join_elimination.go @@ -126,14 +126,14 @@ func (o *outerJoinEliminator) isInnerJoinKeysContainIndex(innerPlan LogicalPlan, return false, nil } for _, path := range ds.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { continue } - if !path.index.Unique { + if !path.Index.Unique { continue } joinKeysContainIndex := true - for _, idxCol := range path.idxCols { + for _, idxCol := range path.IdxCols { if !joinKeys.Contains(idxCol) { joinKeysContainIndex = false break diff --git a/planner/core/rule_max_min_eliminate.go b/planner/core/rule_max_min_eliminate.go index 365144ca43ab8..09e586c0a28c9 100644 --- a/planner/core/rule_max_min_eliminate.go +++ b/planner/core/rule_max_min_eliminate.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/expression/aggregation" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" ) @@ -49,8 +50,8 @@ func (a *maxMinEliminator) composeAggsByInnerJoin(aggs []*LogicalAggregation) (p return } -// checkColCanUseIndex checks whether there is an accessPath satisfy the conditions: -// 1. all of the selection's condition can be pushed down as accessConds of the path. +// checkColCanUseIndex checks whether there is an AccessPath satisfy the conditions: +// 1. all of the selection's condition can be pushed down as AccessConds of the path. // 2. the path can keep order for `col` after pushing down the conditions. func (a *maxMinEliminator) checkColCanUseIndex(plan LogicalPlan, col *expression.Column, conditions []expression.Expression) bool { switch p := plan.(type) { @@ -58,9 +59,9 @@ func (a *maxMinEliminator) checkColCanUseIndex(plan LogicalPlan, col *expression conditions = append(conditions, p.Conditions...) return a.checkColCanUseIndex(p.children[0], col, conditions) case *DataSource: - // Check whether there is an accessPath can use index for col. + // Check whether there is an AccessPath can use index for col. for _, path := range p.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { // Since table path can contain accessConds of at most one column, // we only need to check if all of the conditions can be pushed down as accessConds // and `col` is the handle column. @@ -73,13 +74,13 @@ func (a *maxMinEliminator) checkColCanUseIndex(plan LogicalPlan, col *expression } else { // For index paths, we have to check: // 1. whether all of the conditions can be pushed down as accessConds. - // 2. whether the accessPath can satisfy the order property of `col` with these accessConds. - result, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, conditions, path.fullIdxCols, path.fullIdxColLens) + // 2. whether the AccessPath can satisfy the order property of `col` with these accessConds. + result, err := ranger.DetachCondAndBuildRangeForIndex(p.ctx, conditions, path.FullIdxCols, path.FullIdxColLens) if err != nil || len(result.RemainedConds) != 0 { continue } for i := 0; i <= result.EqCondCount; i++ { - if i < len(path.fullIdxCols) && col.Equal(nil, path.fullIdxCols[i]) { + if i < len(path.FullIdxCols) && col.Equal(nil, path.FullIdxCols[i]) { return true } } @@ -109,7 +110,7 @@ func (a *maxMinEliminator) cloneSubPlans(plan LogicalPlan) LogicalPlan { newDs.schema = p.schema.Clone() newDs.Columns = make([]*model.ColumnInfo, len(p.Columns)) copy(newDs.Columns, p.Columns) - newAccessPaths := make([]*accessPath, 0, len(p.possibleAccessPaths)) + newAccessPaths := make([]*util.AccessPath, 0, len(p.possibleAccessPaths)) for _, path := range p.possibleAccessPaths { newPath := *path newAccessPaths = append(newAccessPaths, &newPath) diff --git a/planner/core/rule_partition_processor.go b/planner/core/rule_partition_processor.go index ae439cbc0153f..0e7aa9500ceae 100644 --- a/planner/core/rule_partition_processor.go +++ b/planner/core/rule_partition_processor.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/parser/model" "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/table/tables" "github.com/pingcap/tidb/types" @@ -164,7 +165,7 @@ func (s *partitionProcessor) prune(ds *DataSource) (LogicalPlan, error) { newDataSource.baseLogicalPlan = newBaseLogicalPlan(ds.SCtx(), plancodec.TypeTableScan, &newDataSource, ds.blockOffset) newDataSource.isPartition = true newDataSource.physicalTableID = pi.Definitions[i].ID - newDataSource.possibleAccessPaths = make([]*accessPath, len(ds.possibleAccessPaths)) + newDataSource.possibleAccessPaths = make([]*util.AccessPath, len(ds.possibleAccessPaths)) for i := range ds.possibleAccessPaths { newPath := *ds.possibleAccessPaths[i] newDataSource.possibleAccessPaths[i] = &newPath diff --git a/planner/core/stats.go b/planner/core/stats.go index 0a6bf02c1b8a0..0e9aa4cd432f8 100644 --- a/planner/core/stats.go +++ b/planner/core/stats.go @@ -21,6 +21,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" "github.com/pingcap/tidb/planner/property" + "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/statistics" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/logutil" @@ -137,24 +138,29 @@ func (ds *DataSource) getColumnNDV(colID int64) (ndv float64) { return ndv } -func (ds *DataSource) deriveStatsByFilter(conds expression.CNFExprs) *property.StatsInfo { - if ds.tableStats == nil { - tableStats := &property.StatsInfo{ - RowCount: float64(ds.statisticTable.Count), - Cardinality: make([]float64, len(ds.Columns)), - HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns), - StatsVersion: ds.statisticTable.Version, - } - if ds.statisticTable.Pseudo { - tableStats.StatsVersion = statistics.PseudoVersion - } - for i, col := range ds.Columns { - tableStats.Cardinality[i] = ds.getColumnNDV(col.ID) - } - ds.tableStats = tableStats - ds.TblColHists = ds.statisticTable.ID2UniqueID(ds.TblCols) +func (ds *DataSource) initStats() { + if ds.tableStats != nil { + return + } + tableStats := &property.StatsInfo{ + RowCount: float64(ds.statisticTable.Count), + Cardinality: make([]float64, len(ds.Columns)), + HistColl: ds.statisticTable.GenerateHistCollFromColumnInfo(ds.Columns, ds.schema.Columns), + StatsVersion: ds.statisticTable.Version, } - selectivity, nodes, err := ds.tableStats.HistColl.Selectivity(ds.ctx, conds) + if ds.statisticTable.Pseudo { + tableStats.StatsVersion = statistics.PseudoVersion + } + for i, col := range ds.Columns { + tableStats.Cardinality[i] = ds.getColumnNDV(col.ID) + } + ds.tableStats = tableStats + ds.TblColHists = ds.statisticTable.ID2UniqueID(ds.TblCols) +} + +func (ds *DataSource) deriveStatsByFilter(conds expression.CNFExprs, filledPaths []*util.AccessPath) *property.StatsInfo { + ds.initStats() + selectivity, nodes, err := ds.tableStats.HistColl.Selectivity(ds.ctx, conds, filledPaths) if err != nil { logutil.BgLogger().Debug("something wrong happened, use the default selectivity", zap.Error(err)) selectivity = selectionFactor @@ -168,31 +174,38 @@ func (ds *DataSource) deriveStatsByFilter(conds expression.CNFExprs) *property.S // DeriveStats implement LogicalPlan DeriveStats interface. func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema *expression.Schema, childSchema []*expression.Schema) (*property.StatsInfo, error) { + ds.initStats() // PushDownNot here can convert query 'not (a != 1)' to 'a = 1'. for i, expr := range ds.pushedDownConds { ds.pushedDownConds[i] = expression.PushDownNot(ds.ctx, expr) } - ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds) for _, path := range ds.possibleAccessPaths { - if path.isTablePath { + if path.IsTablePath { + continue + } + err := ds.fillIndexPath(path, ds.pushedDownConds) + if err != nil { + return nil, err + } + } + ds.stats = ds.deriveStatsByFilter(ds.pushedDownConds, ds.possibleAccessPaths) + for _, path := range ds.possibleAccessPaths { + if path.IsTablePath { noIntervalRanges, err := ds.deriveTablePathStats(path, ds.pushedDownConds, false) if err != nil { return nil, err } // If we have point or empty range, just remove other possible paths. - if noIntervalRanges || len(path.ranges) == 0 { + if noIntervalRanges || len(path.Ranges) == 0 { ds.possibleAccessPaths[0] = path ds.possibleAccessPaths = ds.possibleAccessPaths[:1] break } continue } - noIntervalRanges, err := ds.deriveIndexPathStats(path, ds.pushedDownConds, false) - if err != nil { - return nil, err - } + noIntervalRanges := ds.deriveIndexPathStats(path, ds.pushedDownConds, false) // If we have empty range, or point range on unique index, just remove other possible paths. - if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 { + if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 { ds.possibleAccessPaths[0] = path ds.possibleAccessPaths = ds.possibleAccessPaths[:1] break @@ -204,7 +217,7 @@ func (ds *DataSource) DeriveStats(childStats []*property.StatsInfo, selfSchema * // If there is an index path, we current do not consider `IndexMergePath`. needConsiderIndexMerge := true for i := 1; i < len(ds.possibleAccessPaths); i++ { - if len(ds.possibleAccessPaths[i].accessConds) != 0 { + if len(ds.possibleAccessPaths[i].AccessConds) != 0 { needConsiderIndexMerge = false break } @@ -243,7 +256,7 @@ func (ts *LogicalTableScan) DeriveStats(childStats []*property.StatsInfo, selfSc // `PushDownNot` function call in multiple `DeriveStats` then. ts.AccessConds[i] = expression.PushDownNot(ts.ctx, expr) } - ts.stats = ts.Source.deriveStatsByFilter(ts.AccessConds) + ts.stats = ts.Source.deriveStatsByFilter(ts.AccessConds, nil) sc := ts.SCtx().GetSessionVars().StmtCtx // ts.Handle could be nil if PK is Handle, and PK column has been pruned. if ts.Handle != nil { @@ -268,7 +281,7 @@ func (is *LogicalIndexScan) DeriveStats(childStats []*property.StatsInfo, selfSc for i, expr := range is.AccessConds { is.AccessConds[i] = expression.PushDownNot(is.ctx, expr) } - is.stats = is.Source.deriveStatsByFilter(is.AccessConds) + is.stats = is.Source.deriveStatsByFilter(is.AccessConds, nil) if len(is.AccessConds) == 0 { is.Ranges = ranger.FullRange() } @@ -294,7 +307,7 @@ func (ds *DataSource) generateIndexMergeOrPaths() { if !ok || sf.FuncName.L != ast.LogicOr { continue } - var partialPaths = make([]*accessPath, 0, usedIndexCount) + var partialPaths = make([]*util.AccessPath, 0, usedIndexCount) dnfItems := expression.FlattenDNFConditions(sf) for _, item := range dnfItems { cnfItems := expression.SplitCNFItems(item) @@ -336,49 +349,50 @@ func (ds *DataSource) isInIndexMergeHints(name string) bool { } // accessPathsForConds generates all possible index paths for conditions. -func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, usedIndexCount int) []*accessPath { - var results = make([]*accessPath, 0, usedIndexCount) +func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, usedIndexCount int) []*util.AccessPath { + var results = make([]*util.AccessPath, 0, usedIndexCount) for i := 0; i < usedIndexCount; i++ { - path := &accessPath{} - if ds.possibleAccessPaths[i].isTablePath { + path := &util.AccessPath{} + if ds.possibleAccessPaths[i].IsTablePath { if !ds.isInIndexMergeHints("primary") { continue } - path.isTablePath = true + path.IsTablePath = true noIntervalRanges, err := ds.deriveTablePathStats(path, conditions, true) if err != nil { logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) continue } // If we have point or empty range, just remove other possible paths. - if noIntervalRanges || len(path.ranges) == 0 { + if noIntervalRanges || len(path.Ranges) == 0 { results[0] = path results = results[:1] break } } else { - path.index = ds.possibleAccessPaths[i].index - if !ds.isInIndexMergeHints(path.index.Name.L) { + path.Index = ds.possibleAccessPaths[i].Index + if !ds.isInIndexMergeHints(path.Index.Name.L) { continue } - noIntervalRanges, err := ds.deriveIndexPathStats(path, conditions, true) + err := ds.fillIndexPath(path, conditions) if err != nil { logutil.BgLogger().Debug("can not derive statistics of a path", zap.Error(err)) continue } + noIntervalRanges := ds.deriveIndexPathStats(path, conditions, true) // If we have empty range, or point range on unique index, just remove other possible paths. - if (noIntervalRanges && path.index.Unique) || len(path.ranges) == 0 { + if (noIntervalRanges && path.Index.Unique) || len(path.Ranges) == 0 { results[0] = path results = results[:1] break } } - // If accessConds is empty or tableFilter is not empty, we ignore the access path. + // If AccessConds is empty or tableFilter is not empty, we ignore the access path. // Now these conditions are too strict. // For example, a sql `select * from t where a > 1 or (b < 2 and c > 3)` and table `t` with indexes // on a and b separately. we can generate a `IndexMergePath` with table filter `a > 1 or (b < 2 and c > 3)`. // TODO: solve the above case - if len(path.tableFilters) > 0 || len(path.accessConds) == 0 { + if len(path.TableFilters) > 0 || len(path.AccessConds) == 0 { continue } results = append(results, path) @@ -392,15 +406,15 @@ func (ds *DataSource) accessPathsForConds(conditions []expression.Expression, us // with most columns, e.g, filter is c > 1 and the input indexes are c and c_d_e, // the former one is enough, and it is less expensive in execution compared with the latter one. // TODO: improve strategy of the partial path selection -func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*accessPath) *accessPath { +func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*util.AccessPath) *util.AccessPath { if len(indexAccessPaths) == 1 { return indexAccessPaths[0] } maxColsIndex := 0 - maxCols := len(indexAccessPaths[0].idxCols) + maxCols := len(indexAccessPaths[0].IdxCols) for i := 1; i < len(indexAccessPaths); i++ { - current := len(indexAccessPaths[i].idxCols) + current := len(indexAccessPaths[i].IdxCols) if current > maxCols { maxColsIndex = i maxCols = current @@ -410,10 +424,10 @@ func (ds *DataSource) buildIndexMergePartialPath(indexAccessPaths []*accessPath) } // buildIndexMergeOrPath generates one possible IndexMergePath. -func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*accessPath, current int) *accessPath { - indexMergePath := &accessPath{partialIndexPaths: partialPaths} - indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[:current]...) - indexMergePath.tableFilters = append(indexMergePath.tableFilters, ds.pushedDownConds[current+1:]...) +func (ds *DataSource) buildIndexMergeOrPath(partialPaths []*util.AccessPath, current int) *util.AccessPath { + indexMergePath := &util.AccessPath{PartialIndexPaths: partialPaths} + indexMergePath.TableFilters = append(indexMergePath.TableFilters, ds.pushedDownConds[:current]...) + indexMergePath.TableFilters = append(indexMergePath.TableFilters, ds.pushedDownConds[current+1:]...) return indexMergePath } diff --git a/planner/util/path.go b/planner/util/path.go new file mode 100644 index 0000000000000..5f12f9231eeb0 --- /dev/null +++ b/planner/util/path.go @@ -0,0 +1,118 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "github.com/pingcap/parser/ast" + "github.com/pingcap/parser/model" + "github.com/pingcap/tidb/expression" + "github.com/pingcap/tidb/kv" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/ranger" +) + +// AccessPath indicates the way we access a table: by using single index, or by using multiple indexes, +// or just by using table scan. +type AccessPath struct { + Index *model.IndexInfo + FullIdxCols []*expression.Column + FullIdxColLens []int + IdxCols []*expression.Column + IdxColLens []int + Ranges []*ranger.Range + // CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data. + CountAfterAccess float64 + // CountAfterIndex is the row count after we apply filters on index and before we apply the table filters. + CountAfterIndex float64 + AccessConds []expression.Expression + EqCondCount int + EqOrInCondCount int + IndexFilters []expression.Expression + TableFilters []expression.Expression + // IsTablePath indicates whether this path is table path. + IsTablePath bool + // Forced means this path is generated by `use/force index()`. + Forced bool + // PartialIndexPaths store all index access paths. + // If there are extra filters, store them in TableFilters. + PartialIndexPaths []*AccessPath + + IsDNFCond bool + + StoreType kv.StoreType +} + +// SplitCorColAccessCondFromFilters move the necessary filter in the form of index_col = corrlated_col to access conditions. +func (path *AccessPath) SplitCorColAccessCondFromFilters(eqOrInCount int) (access, remained []expression.Expression) { + access = make([]expression.Expression, len(path.IdxCols)-eqOrInCount) + used := make([]bool, len(path.TableFilters)) + for i := eqOrInCount; i < len(path.IdxCols); i++ { + matched := false + for j, filter := range path.TableFilters { + if used[j] || !isColEqCorColOrConstant(filter, path.IdxCols[i]) { + continue + } + matched = true + access[i-eqOrInCount] = filter + if path.IdxColLens[i] == types.UnspecifiedLength { + used[j] = true + } + break + } + if !matched { + access = access[:i-eqOrInCount] + break + } + } + for i, ok := range used { + if !ok { + remained = append(remained, path.TableFilters[i]) + } + } + return access, remained +} + +// isColEqCorColOrConstant checks if the expression is a eq function that one side is constant or correlated column +// and another is column. +func isColEqCorColOrConstant(filter expression.Expression, col *expression.Column) bool { + f, ok := filter.(*expression.ScalarFunction) + if !ok || f.FuncName.L != ast.EQ { + return false + } + if c, ok := f.GetArgs()[0].(*expression.Column); ok { + if _, ok := f.GetArgs()[1].(*expression.Constant); ok { + if col.Equal(nil, c) { + return true + } + } + if _, ok := f.GetArgs()[1].(*expression.CorrelatedColumn); ok { + if col.Equal(nil, c) { + return true + } + } + } + if c, ok := f.GetArgs()[1].(*expression.Column); ok { + if _, ok := f.GetArgs()[0].(*expression.Constant); ok { + if col.Equal(nil, c) { + return true + } + } + if _, ok := f.GetArgs()[0].(*expression.CorrelatedColumn); ok { + if col.Equal(nil, c) { + return true + } + } + } + return false +} diff --git a/statistics/selectivity.go b/statistics/selectivity.go index fef5c54907d25..d8168003812be 100644 --- a/statistics/selectivity.go +++ b/statistics/selectivity.go @@ -20,6 +20,7 @@ import ( "github.com/pingcap/parser/ast" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/expression" + planutil "github.com/pingcap/tidb/planner/util" "github.com/pingcap/tidb/sessionctx" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/ranger" @@ -147,7 +148,7 @@ func isColEqCorCol(filter expression.Expression) *expression.Column { // The definition of selectivity is (row count after filter / row count before filter). // And exprs must be CNF now, in other words, `exprs[0] and exprs[1] and ... and exprs[len - 1]` should be held when you call this. // Currently the time complexity is o(n^2). -func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression) (float64, []*StatsNode, error) { +func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Expression, filledPaths []*planutil.AccessPath) (float64, []*StatsNode, error) { // If table's count is zero or conditions are empty, we should return 100% selectivity. if coll.Count == 0 || len(exprs) == 0 { return 1, nil, nil @@ -189,7 +190,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp for id, colInfo := range coll.Columns { col := expression.ColInfo2Col(extractedCols, colInfo.Info) if col != nil { - maskCovered, ranges, _, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, col) + maskCovered, ranges, _, err := getMaskAndRanges(ctx, remainedExprs, ranger.ColumnRangeType, nil, nil, col) if err != nil { return 0, nil, errors.Trace(err) } @@ -211,6 +212,13 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp nodes[len(nodes)-1].Selectivity = cnt / float64(coll.Count) } } + id2Paths := make(map[int64]*planutil.AccessPath) + for _, path := range filledPaths { + if path.IsTablePath { + continue + } + id2Paths[path.Index.ID] = path + } for id, idxInfo := range coll.Indices { idxCols := expression.FindPrefixOfIndex(extractedCols, coll.Idx2ColumnIDs[id]) if len(idxCols) > 0 { @@ -218,7 +226,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp for i := 0; i < len(idxCols); i++ { lengths = append(lengths, idxInfo.Info.Columns[i].Length) } - maskCovered, ranges, partCover, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, idxCols...) + maskCovered, ranges, partCover, err := getMaskAndRanges(ctx, remainedExprs, ranger.IndexRangeType, lengths, id2Paths[idxInfo.ID], idxCols...) if err != nil { return 0, nil, errors.Trace(err) } @@ -259,8 +267,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp return ret, nodes, nil } -func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, rangeType ranger.RangeType, - lengths []int, cols ...*expression.Column) (mask int64, ranges []*ranger.Range, partCover bool, err error) { +func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, rangeType ranger.RangeType, lengths []int, cachedPath *planutil.AccessPath, cols ...*expression.Column) (mask int64, ranges []*ranger.Range, partCover bool, err error) { sc := ctx.GetSessionVars().StmtCtx isDNF := false var accessConds, remainedConds []expression.Expression @@ -269,9 +276,16 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran accessConds = ranger.ExtractAccessConditionsForColumn(exprs, cols[0].UniqueID) ranges, err = ranger.BuildColumnRange(accessConds, sc, cols[0].RetType, types.UnspecifiedLength) case ranger.IndexRangeType: + if cachedPath != nil { + ranges, accessConds, remainedConds, isDNF = cachedPath.Ranges, cachedPath.AccessConds, cachedPath.TableFilters, cachedPath.IsDNFCond + break + } var res *ranger.DetachRangeResult res, err = ranger.DetachCondAndBuildRangeForIndex(ctx, exprs, cols, lengths) ranges, accessConds, remainedConds, isDNF = res.Ranges, res.AccessConds, res.RemainedConds, res.IsDNFCond + if err != nil { + return 0, nil, false, err + } default: panic("should never be here") } diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 109b4df605946..20a56f600ee42 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -301,12 +301,12 @@ func (s *testStatsSuite) TestSelectivity(c *C) { histColl := statsTbl.GenerateHistCollFromColumnInfo(ds.Columns, ds.Schema().Columns) - ratio, _, err := histColl.Selectivity(sctx, sel.Conditions) + ratio, _, err := histColl.Selectivity(sctx, sel.Conditions, nil) c.Assert(err, IsNil, comment) c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) histColl.Count *= 10 - ratio, _, err = histColl.Selectivity(sctx, sel.Conditions) + ratio, _, err = histColl.Selectivity(sctx, sel.Conditions, nil) c.Assert(err, IsNil, comment) c.Assert(math.Abs(ratio-tt.selectivity) < eps, IsTrue, Commentf("for %s, needed: %v, got: %v", tt.exprs, tt.selectivity, ratio)) } @@ -531,7 +531,7 @@ func BenchmarkSelectivity(b *testing.B) { b.Run("Selectivity", func(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, _, err := statsTbl.Selectivity(sctx, p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection).Conditions) + _, _, err := statsTbl.Selectivity(sctx, p.(plannercore.LogicalPlan).Children()[0].(*plannercore.LogicalSelection).Conditions, nil) c.Assert(err, IsNil) } b.ReportAllocs()