Skip to content

enhance: simplify compaction tasks to reduce their memory overhead #39121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions internal/datacoord/compaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"time"

"github.com/cockroachdb/errors"
"go.opentelemetry.io/otel"
"go.uber.org/zap"

"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
Expand Down Expand Up @@ -592,8 +591,6 @@ func (c *compactionPlanHandler) removeTasksByChannel(channel string) {
}

func (c *compactionPlanHandler) submitTask(t CompactionTask) error {
_, span := otel.Tracer(typeutil.DataCoordRole).Start(context.Background(), fmt.Sprintf("Compaction-%s", t.GetTaskProto().GetType()))
t.SetSpan(span)
if err := c.queueTasks.Enqueue(t); err != nil {
return err
}
Expand All @@ -603,8 +600,6 @@ func (c *compactionPlanHandler) submitTask(t CompactionTask) error {

// restoreTask used to restore Task from etcd
func (c *compactionPlanHandler) restoreTask(t CompactionTask) {
_, span := otel.Tracer(typeutil.DataCoordRole).Start(context.Background(), fmt.Sprintf("Compaction-%s", t.GetTaskProto().GetType()))
t.SetSpan(span)
c.executingGuard.Lock()
c.executingTasks[t.GetTaskProto().GetPlanID()] = t
c.executingGuard.Unlock()
Expand Down
5 changes: 0 additions & 5 deletions internal/datacoord/compaction_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
package datacoord

import (
"go.opentelemetry.io/otel/trace"

"github.com/milvus-io/milvus/pkg/proto/datapb"
)

Expand All @@ -40,13 +38,10 @@ type CompactionTask interface {

SetTask(*datapb.CompactionTask)
GetTaskProto() *datapb.CompactionTask
SetPlan(plan *datapb.CompactionPlan)
ShadowClone(opts ...compactionTaskOpt) *datapb.CompactionTask

SetNodeID(UniqueID) error
NeedReAssignNodeID() bool
GetSpan() trace.Span
SetSpan(trace.Span)
SaveTaskMeta() error
}

Expand Down
21 changes: 1 addition & 20 deletions internal/datacoord/compaction_task_clustering.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (

"github.com/cockroachdb/errors"
"github.com/samber/lo"
"go.opentelemetry.io/otel/trace"
"go.uber.org/atomic"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
Expand All @@ -52,15 +51,13 @@ type clusteringCompactionTask struct {
plan *datapb.CompactionPlan
result *datapb.CompactionPlanResult

span trace.Span
allocator allocator.Allocator
meta CompactionMeta
sessions session.DataNodeManager
handler Handler
analyzeScheduler *taskScheduler

maxRetryTimes int32
slotUsage int64
}

func (t *clusteringCompactionTask) GetTaskProto() *datapb.CompactionTask {
Expand All @@ -79,7 +76,6 @@ func newClusteringCompactionTask(t *datapb.CompactionTask, allocator allocator.A
handler: handler,
analyzeScheduler: analyzeScheduler,
maxRetryTimes: 3,
slotUsage: paramtable.Get().DataCoordCfg.ClusteringCompactionSlotUsage.GetAsInt64(),
}
task.taskProto.Store(t)
return task
Expand Down Expand Up @@ -272,7 +268,6 @@ func (t *clusteringCompactionTask) processExecuting() error {
switch result.GetState() {
case datapb.CompactionTaskState_completed:
t.result = result
result := t.result
if len(result.GetSegments()) == 0 {
log.Warn("illegal compaction results, this should not happen")
return merr.WrapErrCompactionResult("compaction result is empty")
Expand Down Expand Up @@ -766,24 +761,10 @@ func (t *clusteringCompactionTask) GetResult() *datapb.CompactionPlanResult {
return t.result
}

func (t *clusteringCompactionTask) GetSpan() trace.Span {
return t.span
}

func (t *clusteringCompactionTask) EndSpan() {
if t.span != nil {
t.span.End()
}
}

func (t *clusteringCompactionTask) SetResult(result *datapb.CompactionPlanResult) {
t.result = result
}

func (t *clusteringCompactionTask) SetSpan(span trace.Span) {
t.span = span
}

func (t *clusteringCompactionTask) SetPlan(plan *datapb.CompactionPlan) {
t.plan = plan
}
Expand All @@ -805,5 +786,5 @@ func (t *clusteringCompactionTask) NeedReAssignNodeID() bool {
}

func (t *clusteringCompactionTask) GetSlotUsage() int64 {
return t.slotUsage
return paramtable.Get().DataCoordCfg.ClusteringCompactionSlotUsage.GetAsInt64()
}
52 changes: 6 additions & 46 deletions internal/datacoord/compaction_task_l0.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (

"github.com/cockroachdb/errors"
"github.com/samber/lo"
"go.opentelemetry.io/otel/trace"
"go.uber.org/atomic"
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
Expand All @@ -42,15 +41,10 @@ var _ CompactionTask = (*l0CompactionTask)(nil)

type l0CompactionTask struct {
taskProto atomic.Value // *datapb.CompactionTask
plan *datapb.CompactionPlan
result *datapb.CompactionPlanResult

span trace.Span
allocator allocator.Allocator
sessions session.DataNodeManager
meta CompactionMeta

slotUsage int64
}

func (t *l0CompactionTask) GetTaskProto() *datapb.CompactionTask {
Expand All @@ -66,7 +60,6 @@ func newL0CompactionTask(t *datapb.CompactionTask, allocator allocator.Allocator
allocator: allocator,
meta: meta,
sessions: session,
slotUsage: paramtable.Get().DataCoordCfg.L0DeleteCompactionSlotUsage.GetAsInt64(),
}
task.taskProto.Store(t)
return task
Expand Down Expand Up @@ -96,8 +89,7 @@ func (t *l0CompactionTask) processPipelining() bool {
}

log := log.With(zap.Int64("triggerID", t.GetTaskProto().GetTriggerID()), zap.Int64("nodeID", t.GetTaskProto().GetNodeID()))
var err error
t.plan, err = t.BuildCompactionRequest()
plan, err := t.BuildCompactionRequest()
if err != nil {
log.Warn("l0CompactionTask failed to build compaction request", zap.Error(err))
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error()))
Expand All @@ -109,7 +101,7 @@ func (t *l0CompactionTask) processPipelining() bool {
return t.processFailed()
}

err = t.sessions.Compaction(context.TODO(), t.GetTaskProto().GetNodeID(), t.GetPlan())
err = t.sessions.Compaction(context.TODO(), t.GetTaskProto().GetNodeID(), plan)
if err != nil {
log.Warn("l0CompactionTask failed to notify compaction tasks to DataNode", zap.Int64("planID", t.GetTaskProto().GetPlanID()), zap.Error(err))
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
Expand All @@ -132,8 +124,7 @@ func (t *l0CompactionTask) processExecuting() bool {
}
switch result.GetState() {
case datapb.CompactionTaskState_completed:
t.result = result
if err := t.saveSegmentMeta(); err != nil {
if err := t.saveSegmentMeta(result); err != nil {
log.Warn("l0CompactionTask failed to save segment meta", zap.Error(err))
return false
}
Expand All @@ -142,6 +133,7 @@ func (t *l0CompactionTask) processExecuting() bool {
log.Warn("l0CompactionTask failed to save task meta_saved state", zap.Error(err))
return false
}
UpdateCompactionSegmentSizeMetrics(result.GetSegments())
return t.processMetaSaved()
case datapb.CompactionTaskState_failed:
if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed)); err != nil {
Expand Down Expand Up @@ -173,7 +165,6 @@ func (t *l0CompactionTask) processCompleted() bool {
}

t.resetSegmentCompacting()
UpdateCompactionSegmentSizeMetrics(t.result.GetSegments())
task := t.taskProto.Load().(*datapb.CompactionTask)
log.Info("l0CompactionTask processCompleted done", zap.Int64("planID", task.GetPlanID()),
zap.Duration("costs", time.Duration(task.GetEndTime()-task.GetStartTime())*time.Second))
Expand Down Expand Up @@ -212,40 +203,10 @@ func (t *l0CompactionTask) Clean() bool {
return t.doClean() == nil
}

func (t *l0CompactionTask) GetResult() *datapb.CompactionPlanResult {
return t.result
}

func (t *l0CompactionTask) SetResult(result *datapb.CompactionPlanResult) {
t.result = result
}

func (t *l0CompactionTask) SetTask(task *datapb.CompactionTask) {
t.taskProto.Store(task)
}

func (t *l0CompactionTask) GetSpan() trace.Span {
return t.span
}

func (t *l0CompactionTask) SetSpan(span trace.Span) {
t.span = span
}

func (t *l0CompactionTask) EndSpan() {
if t.span != nil {
t.span.End()
}
}

func (t *l0CompactionTask) SetPlan(plan *datapb.CompactionPlan) {
t.plan = plan
}

func (t *l0CompactionTask) GetPlan() *datapb.CompactionPlan {
return t.plan
}

func (t *l0CompactionTask) GetLabel() string {
return fmt.Sprintf("%d-%s", t.GetTaskProto().PartitionID, t.GetTaskProto().GetChannel())
}
Expand Down Expand Up @@ -373,8 +334,7 @@ func (t *l0CompactionTask) saveTaskMeta(task *datapb.CompactionTask) error {
return t.meta.SaveCompactionTask(context.TODO(), task)
}

func (t *l0CompactionTask) saveSegmentMeta() error {
result := t.result
func (t *l0CompactionTask) saveSegmentMeta(result *datapb.CompactionPlanResult) error {
var operators []UpdateOperator
for _, seg := range result.GetSegments() {
operators = append(operators, AddBinlogsOperator(seg.GetSegmentID(), nil, nil, seg.GetDeltalogs(), nil))
Expand All @@ -392,5 +352,5 @@ func (t *l0CompactionTask) saveSegmentMeta() error {
}

func (t *l0CompactionTask) GetSlotUsage() int64 {
return t.slotUsage
return paramtable.Get().DataCoordCfg.L0DeleteCompactionSlotUsage.GetAsInt64()
}
40 changes: 0 additions & 40 deletions internal/datacoord/compaction_task_l0_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import (
"github.com/samber/lo"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"go.opentelemetry.io/otel/trace"

"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/datacoord/allocator"
Expand Down Expand Up @@ -444,7 +443,6 @@ func (s *L0CompactionTaskSuite) TestPorcessStateTrans() {
t := s.generateTestL0Task(datapb.CompactionTaskState_meta_saved)
t.updateAndSaveTaskMeta(setNodeID(100))
s.Require().True(t.GetTaskProto().GetNodeID() > 0)
t.result = &datapb.CompactionPlanResult{}

s.mockMeta.EXPECT().SetSegmentsCompacting(mock.Anything, mock.Anything, false).RunAndReturn(func(ctx context.Context, segIDs []int64, isCompacting bool) {
s.ElementsMatch(segIDs, t.GetTaskProto().GetInputSegments())
Expand All @@ -461,7 +459,6 @@ func (s *L0CompactionTaskSuite) TestPorcessStateTrans() {
t := s.generateTestL0Task(datapb.CompactionTaskState_meta_saved)
t.updateAndSaveTaskMeta(setNodeID(100))
s.Require().True(t.GetTaskProto().GetNodeID() > 0)
t.result = &datapb.CompactionPlanResult{}

s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything, mock.Anything).Return(errors.New("mock error")).Once()

Expand All @@ -475,7 +472,6 @@ func (s *L0CompactionTaskSuite) TestPorcessStateTrans() {
t := s.generateTestL0Task(datapb.CompactionTaskState_completed)
t.updateAndSaveTaskMeta(setNodeID(100))
s.Require().True(t.GetTaskProto().GetNodeID() > 0)
t.result = &datapb.CompactionPlanResult{}
s.mockSessMgr.EXPECT().DropCompactionPlan(t.GetTaskProto().GetNodeID(), mock.Anything).Return(errors.New("mock error")).Once()
s.mockMeta.EXPECT().SetSegmentsCompacting(mock.Anything, mock.Anything, false).RunAndReturn(func(ctx context.Context, segIDs []int64, isCompacting bool) {
s.ElementsMatch(segIDs, t.GetTaskProto().GetInputSegments())
Expand All @@ -491,7 +487,6 @@ func (s *L0CompactionTaskSuite) TestPorcessStateTrans() {
t := s.generateTestL0Task(datapb.CompactionTaskState_completed)
t.updateAndSaveTaskMeta(setNodeID(100))
s.Require().True(t.GetTaskProto().GetNodeID() > 0)
t.result = &datapb.CompactionPlanResult{}
s.mockSessMgr.EXPECT().DropCompactionPlan(t.GetTaskProto().GetNodeID(), mock.Anything).Return(nil).Once()
s.mockMeta.EXPECT().SetSegmentsCompacting(mock.Anything, mock.Anything, false).RunAndReturn(func(ctx context.Context, segIDs []int64, isCompacting bool) {
s.ElementsMatch(segIDs, t.GetTaskProto().GetInputSegments())
Expand Down Expand Up @@ -531,38 +526,3 @@ func (s *L0CompactionTaskSuite) TestPorcessStateTrans() {
s.True(got)
})
}

func (s *L0CompactionTaskSuite) TestSetterGetter() {
s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything, mock.Anything).Return(nil)
t := s.generateTestL0Task(datapb.CompactionTaskState_pipelining)

span := t.GetSpan()
s.Nil(span)
s.NotPanics(t.EndSpan)

t.SetSpan(trace.SpanFromContext(context.TODO()))
s.NotPanics(t.EndSpan)

rst := t.GetResult()
s.Nil(rst)
t.SetResult(&datapb.CompactionPlanResult{PlanID: 19530})
s.NotNil(t.GetResult())

label := t.GetLabel()
s.Equal("10-ch-1", label)

t.updateAndSaveTaskMeta(setStartTime(100))
s.EqualValues(100, t.GetTaskProto().GetStartTime())

t.SetTask(nil)
t.SetPlan(&datapb.CompactionPlan{PlanID: 19530})
s.NotNil(t.GetPlan())

s.Run("set NodeID", func() {
t := s.generateTestL0Task(datapb.CompactionTaskState_pipelining)

s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything, mock.Anything).Return(nil)
t.SetNodeID(1000)
s.EqualValues(1000, t.GetTaskProto().GetNodeID())
})
}
Loading
Loading