diff --git a/README.md b/README.md index 83976437..55ac352f 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,10 @@ Source Transformer, Functions, Sinks or SideInputs in Golang. - Implement [User Defined Sources](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/sourcer) - Implement [User Defined Source Transformers](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/sourcetransformer) -- Implement [User Defined Functions](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/function) -- Implement [User Defined Sinks](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/sink) +- Implement User Defined Functions + - [Map](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/mapper) + - [Reduce](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/reducer) +- Implement [User Defined Sinks](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/sinker) - Implement [User Defined SideInputs](https://pkg.go.dev/github.com/numaproj/numaflow-go/pkg/sideinput) ## Development @@ -15,4 +17,4 @@ Source Transformer, Functions, Sinks or SideInputs in Golang. `make test` - Run the tests. `make proto`- Regenerate the protobuf files from the [proto files](https://github.com/numaproj/numaflow/tree/main/pkg/apis/proto) defined in [numaproj/numaflow](https://github.com/numaproj/numaflow) repository. -`make proto ORG=xxx PROJECT=xxx BRANCH=xxx` - Regenerate the protobuf files from specified github repository. +`make proto ORG=xxx PROJECT=xxx BRANCH=xxx` - Regenerate the protobuf files from specified github repository. Default values: `ORG=numaproj PROJECT=numaflow BRANCH=main` diff --git a/go.mod b/go.mod index a6ed38f4..423fb94f 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.20 require ( github.com/golang/mock v1.6.0 github.com/stretchr/testify v1.8.1 + go.uber.org/atomic v1.11.0 golang.org/x/net v0.9.0 golang.org/x/sync v0.1.0 google.golang.org/grpc v1.57.0 diff --git a/go.sum b/go.sum index 90eb1d42..c20884dc 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,8 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= diff --git a/pkg/apis/proto/reduce/v1/reduce.proto b/pkg/apis/proto/reduce/v1/reduce.proto new file mode 100644 index 00000000..2b2e071d --- /dev/null +++ b/pkg/apis/proto/reduce/v1/reduce.proto @@ -0,0 +1,81 @@ +syntax = "proto3"; + +option go_package = "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1"; + +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; + + +package reduce.v1; + +service Reduce { + // ReduceFn applies a reduce function to a request stream. + rpc ReduceFn(stream ReduceRequest) returns (stream ReduceResponse); + + // IsReady is the heartbeat endpoint for gRPC. + rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); +} + +/** + * ReduceRequest represents a request element. + */ +message ReduceRequest { + // WindowOperation represents a window operation. + // For Aligned windows, OPEN, APPEND and CLOSE events are sent. + message WindowOperation { + enum Event { + OPEN = 0; + CLOSE = 1; + APPEND = 4; + } + + Event event = 1; + repeated Window windows = 2; + } + + // Payload represents a payload element. + message Payload { + repeated string keys = 1; + bytes value = 2; + google.protobuf.Timestamp event_time = 3; + google.protobuf.Timestamp watermark = 4; + } + + Payload payload = 1; + WindowOperation operation = 2; +} + +// Window represents a window. +// Since the client doesn't track keys, window doesn't have a keys field. +message Window { + google.protobuf.Timestamp start = 1; + google.protobuf.Timestamp end = 2; + string slot = 3; +} + +/** + * ReduceResponse represents a response element. + */ +message ReduceResponse { + // Result represents a result element. It contains the result of the reduce function. + message Result { + repeated string keys = 1; + bytes value = 2; + repeated string tags = 3; + } + + Result result = 1; + + // window represents a window to which the result belongs. + Window window = 2; + + // EOF represents the end of the response for a window. + bool EOF = 3; +} + +/** + * ReadyResponse is the health check result. + */ +message ReadyResponse { + bool ready = 1; +} \ No newline at end of file diff --git a/pkg/apis/proto/sessionreduce/v1/mockgen.go b/pkg/apis/proto/sessionreduce/v1/mockgen.go new file mode 100644 index 00000000..53aa925b --- /dev/null +++ b/pkg/apis/proto/sessionreduce/v1/mockgen.go @@ -0,0 +1,3 @@ +package v1 + +//go:generate mockgen -destination sessionreducemock/sessionreducemock.go -package sessionreducemock github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1 SessionReduceClient,SessionReduce_SessionReduceFnClient diff --git a/pkg/apis/proto/sessionreduce/v1/sessionreduce.proto b/pkg/apis/proto/sessionreduce/v1/sessionreduce.proto new file mode 100644 index 00000000..07d354e6 --- /dev/null +++ b/pkg/apis/proto/sessionreduce/v1/sessionreduce.proto @@ -0,0 +1,84 @@ +syntax = "proto3"; + +option go_package = "github.com/numaproj/numaflow-go/pkg/apis/proto/reducestream/v1"; + +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; + + +package sessionreduce.v1; + +service SessionReduce { + // SessionReduceFn applies a reduce function to a request stream. + rpc SessionReduceFn(stream SessionReduceRequest) returns (stream SessionReduceResponse); + + // IsReady is the heartbeat endpoint for gRPC. + rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); +} + +// KeyedWindow represents a window with keys. +// since the client track the keys, we use keyed window. +message KeyedWindow { + google.protobuf.Timestamp start = 1; + google.protobuf.Timestamp end = 2; + string slot = 3; + repeated string keys = 4; +} + +/** + * SessionReduceRequest represents a request element. + */ +message SessionReduceRequest { + // WindowOperation represents a window operation. + // For Aligned window values can be one of OPEN, CLOSE, EXPAND, MERGE and APPEND. + message WindowOperation { + enum Event { + OPEN = 0; + CLOSE = 1; + EXPAND = 2; + MERGE = 3; + APPEND = 4; + } + + Event event = 1; + repeated KeyedWindow keyedWindows = 2; + } + + // Payload represents a payload element. + message Payload { + repeated string keys = 1; + bytes value = 2; + google.protobuf.Timestamp event_time = 3; + google.protobuf.Timestamp watermark = 4; + } + + Payload payload = 1; + WindowOperation operation = 2; +} + +/** + * SessionReduceResponse represents a response element. + */ +message SessionReduceResponse { + // Result represents a result element. It contains the result of the reduce function. + message Result { + repeated string keys = 1; + bytes value = 2; + repeated string tags = 3; + } + + Result result = 1; + + // keyedWindow represents a window to which the result belongs. + KeyedWindow keyedWindow = 2; + + // EOF represents the end of the response for a window. + bool EOF = 3; +} + +/** + * ReadyResponse is the health check result. + */ +message ReadyResponse { + bool ready = 1; +} \ No newline at end of file diff --git a/pkg/apis/proto/sessionreduce/v1/sessionreducemock/sessionreducemock.go b/pkg/apis/proto/sessionreduce/v1/sessionreducemock/sessionreducemock.go new file mode 100644 index 00000000..d1572efb --- /dev/null +++ b/pkg/apis/proto/sessionreduce/v1/sessionreducemock/sessionreducemock.go @@ -0,0 +1,216 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1 (interfaces: SessionReduceClient,SessionReduce_SessionReduceFnClient) + +// Package sessionreducemock is a generated GoMock package. +package sessionreducemock + +import ( + context "context" + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + v1 "github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1" + grpc "google.golang.org/grpc" + metadata "google.golang.org/grpc/metadata" + emptypb "google.golang.org/protobuf/types/known/emptypb" +) + +// MockSessionReduceClient is a mock of SessionReduceClient interface. +type MockSessionReduceClient struct { + ctrl *gomock.Controller + recorder *MockSessionReduceClientMockRecorder +} + +// MockSessionReduceClientMockRecorder is the mock recorder for MockSessionReduceClient. +type MockSessionReduceClientMockRecorder struct { + mock *MockSessionReduceClient +} + +// NewMockSessionReduceClient creates a new mock instance. +func NewMockSessionReduceClient(ctrl *gomock.Controller) *MockSessionReduceClient { + mock := &MockSessionReduceClient{ctrl: ctrl} + mock.recorder = &MockSessionReduceClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockSessionReduceClient) EXPECT() *MockSessionReduceClientMockRecorder { + return m.recorder +} + +// IsReady mocks base method. +func (m *MockSessionReduceClient) IsReady(arg0 context.Context, arg1 *emptypb.Empty, arg2 ...grpc.CallOption) (*v1.ReadyResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "IsReady", varargs...) + ret0, _ := ret[0].(*v1.ReadyResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// IsReady indicates an expected call of IsReady. +func (mr *MockSessionReduceClientMockRecorder) IsReady(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsReady", reflect.TypeOf((*MockSessionReduceClient)(nil).IsReady), varargs...) +} + +// SessionReduceFn mocks base method. +func (m *MockSessionReduceClient) SessionReduceFn(arg0 context.Context, arg1 ...grpc.CallOption) (v1.SessionReduce_SessionReduceFnClient, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0} + for _, a := range arg1 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "SessionReduceFn", varargs...) + ret0, _ := ret[0].(v1.SessionReduce_SessionReduceFnClient) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// SessionReduceFn indicates an expected call of SessionReduceFn. +func (mr *MockSessionReduceClientMockRecorder) SessionReduceFn(arg0 interface{}, arg1 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0}, arg1...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SessionReduceFn", reflect.TypeOf((*MockSessionReduceClient)(nil).SessionReduceFn), varargs...) +} + +// MockSessionReduce_SessionReduceFnClient is a mock of SessionReduce_SessionReduceFnClient interface. +type MockSessionReduce_SessionReduceFnClient struct { + ctrl *gomock.Controller + recorder *MockSessionReduce_SessionReduceFnClientMockRecorder +} + +// MockSessionReduce_SessionReduceFnClientMockRecorder is the mock recorder for MockSessionReduce_SessionReduceFnClient. +type MockSessionReduce_SessionReduceFnClientMockRecorder struct { + mock *MockSessionReduce_SessionReduceFnClient +} + +// NewMockSessionReduce_SessionReduceFnClient creates a new mock instance. +func NewMockSessionReduce_SessionReduceFnClient(ctrl *gomock.Controller) *MockSessionReduce_SessionReduceFnClient { + mock := &MockSessionReduce_SessionReduceFnClient{ctrl: ctrl} + mock.recorder = &MockSessionReduce_SessionReduceFnClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockSessionReduce_SessionReduceFnClient) EXPECT() *MockSessionReduce_SessionReduceFnClientMockRecorder { + return m.recorder +} + +// CloseSend mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) CloseSend() error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CloseSend") + ret0, _ := ret[0].(error) + return ret0 +} + +// CloseSend indicates an expected call of CloseSend. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) CloseSend() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CloseSend", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).CloseSend)) +} + +// Context mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) Context() context.Context { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Context") + ret0, _ := ret[0].(context.Context) + return ret0 +} + +// Context indicates an expected call of Context. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) Context() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Context", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).Context)) +} + +// Header mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) Header() (metadata.MD, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Header") + ret0, _ := ret[0].(metadata.MD) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Header indicates an expected call of Header. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) Header() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Header", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).Header)) +} + +// Recv mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) Recv() (*v1.SessionReduceResponse, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Recv") + ret0, _ := ret[0].(*v1.SessionReduceResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Recv indicates an expected call of Recv. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) Recv() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Recv", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).Recv)) +} + +// RecvMsg mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) RecvMsg(arg0 interface{}) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "RecvMsg", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// RecvMsg indicates an expected call of RecvMsg. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) RecvMsg(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RecvMsg", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).RecvMsg), arg0) +} + +// Send mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) Send(arg0 *v1.SessionReduceRequest) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Send", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// Send indicates an expected call of Send. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) Send(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Send", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).Send), arg0) +} + +// SendMsg mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) SendMsg(arg0 interface{}) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SendMsg", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// SendMsg indicates an expected call of SendMsg. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) SendMsg(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SendMsg", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).SendMsg), arg0) +} + +// Trailer mocks base method. +func (m *MockSessionReduce_SessionReduceFnClient) Trailer() metadata.MD { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Trailer") + ret0, _ := ret[0].(metadata.MD) + return ret0 +} + +// Trailer indicates an expected call of Trailer. +func (mr *MockSessionReduce_SessionReduceFnClientMockRecorder) Trailer() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Trailer", reflect.TypeOf((*MockSessionReduce_SessionReduceFnClient)(nil).Trailer)) +} diff --git a/pkg/apis/proto/source/v1/source.proto b/pkg/apis/proto/source/v1/source.proto new file mode 100644 index 00000000..a0778dfe --- /dev/null +++ b/pkg/apis/proto/source/v1/source.proto @@ -0,0 +1,151 @@ +syntax = "proto3"; + +option go_package = "github.com/numaproj/numaflow-go/pkg/apis/proto/source/v1"; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/empty.proto"; + +package source.v1; + +service Source { + // Read returns a stream of datum responses. + // The size of the returned ReadResponse is less than or equal to the num_records specified in ReadRequest. + // If the request timeout is reached on server side, the returned ReadResponse will contain all the datum that have been read (which could be an empty list). + rpc ReadFn(ReadRequest) returns (stream ReadResponse); + + // AckFn acknowledges a list of datum offsets. + // When AckFn is called, it implicitly indicates that the datum stream has been processed by the source vertex. + // The caller (numa) expects the AckFn to be successful, and it does not expect any errors. + // If there are some irrecoverable errors when the callee (UDSource) is processing the AckFn request, + // then it is best to crash because there are no other retry mechanisms possible. + rpc AckFn(AckRequest) returns (AckResponse); + + // PendingFn returns the number of pending records at the user defined source. + rpc PendingFn(google.protobuf.Empty) returns (PendingResponse); + + // PartitionsFn returns the list of partitions for the user defined source. + rpc PartitionsFn(google.protobuf.Empty) returns (PartitionsResponse); + + // IsReady is the heartbeat endpoint for user defined source gRPC. + rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); +} + +/* + * ReadRequest is the request for reading datum stream from user defined source. + */ +message ReadRequest { + message Request { + // Required field indicating the number of records to read. + uint64 num_records = 1; + // Required field indicating the request timeout in milliseconds. + // uint32 can represent 2^32 milliseconds, which is about 49 days. + // We don't use uint64 because time.Duration takes int64 as nano seconds. Using uint64 for milli will cause overflow. + uint32 timeout_in_ms = 2; + } + // Required field indicating the request. + Request request = 1; +} + +/* + * ReadResponse is the response for reading datum stream from user defined source. + */ +message ReadResponse { + message Result { + // Required field holding the payload of the datum. + bytes payload = 1; + // Required field indicating the offset information of the datum. + Offset offset = 2; + // Required field representing the time associated with each datum. It is used for watermarking. + google.protobuf.Timestamp event_time = 3; + // Optional list of keys associated with the datum. + // Key is the "key" attribute in (key,value) as in the map-reduce paradigm. + // We add this optional field to support the use case where the user defined source can provide keys for the datum. + // e.g. Kafka and Redis Stream message usually include information about the keys. + repeated string keys = 4; + } + // Required field holding the result. + Result result = 1; +} + +/* + * AckRequest is the request for acknowledging datum. + * It takes a list of offsets to be acknowledged. + */ +message AckRequest { + message Request { + // Required field holding a list of offsets to be acknowledged. + // The offsets must be strictly corresponding to the previously read batch, + // meaning the offsets must be in the same order as the datum responses in the ReadResponse. + // By enforcing ordering, we can save deserialization effort on the server side, assuming the server keeps a local copy of the raw/un-serialized offsets. + repeated Offset offsets = 1; + } + // Required field holding the request. The list will be ordered and will have the same order as the original Read response. + Request request = 1; +} + +/* + * AckResponse is the response for acknowledging datum. It contains one empty field confirming + * the batch of offsets that have been successfully acknowledged. The contract between client and server + * is that the server will only return the AckResponse if the ack request is successful. + * If the server hangs during the ack request, the client can decide to timeout and error out the data forwarder. + * The reason why we define such contract is that we always expect the server to be able to process the ack request. + * Client is expected to send the AckRequest to the server with offsets that are strictly + * corresponding to the previously read batch. If the client sends the AckRequest with offsets that are not, + * it is considered as a client error and the server will not return the AckResponse. + */ +message AckResponse { + message Result { + // Required field indicating the ack request is successful. + google.protobuf.Empty success = 1; + } + // Required field holding the result. + Result result = 1; +} + +/* + * ReadyResponse is the health check result for user defined source. + */ +message ReadyResponse { + // Required field holding the health check result. + bool ready = 1; +} + +/* + * PendingResponse is the response for the pending request. + */ +message PendingResponse { + message Result { + // Required field holding the number of pending records at the user defined source. + // A negative count indicates that the pending information is not available. + int64 count = 1; + } + // Required field holding the result. + Result result = 1; +} + +/* + * PartitionsResponse is the response for the partitions request. + */ +message PartitionsResponse { + message Result { + // Required field holding the list of partitions. + repeated int32 partitions = 1; + } + // Required field holding the result. + Result result = 1; +} + +/* + * Offset is the offset of the datum. + */ +message Offset { + // offset is the offset of the datum. This field is required. + // We define Offset as a byte array because different input data sources can have different representations for Offset. + // The only way to generalize it is to define it as a byte array, + // Such that we can let the UDSource to de-serialize the offset using its own interpretation logics. + bytes offset = 1; + // Optional partition_id indicates which partition of the source the datum belongs to. + // It is useful for sources that have multiple partitions. e.g. Kafka. + // If the partition_id is not specified, it is assumed that the source has a single partition. + int32 partition_id = 2; +} \ No newline at end of file diff --git a/pkg/apis/proto/source/v1/sourcemock/sourcemock.go b/pkg/apis/proto/source/v1/sourcemock/sourcemock.go index 2975b862..6d23e3d1 100644 --- a/pkg/apis/proto/source/v1/sourcemock/sourcemock.go +++ b/pkg/apis/proto/source/v1/sourcemock/sourcemock.go @@ -78,6 +78,26 @@ func (mr *MockSourceClientMockRecorder) IsReady(arg0, arg1 interface{}, arg2 ... return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsReady", reflect.TypeOf((*MockSourceClient)(nil).IsReady), varargs...) } +// PartitionsFn mocks base method. +func (m *MockSourceClient) PartitionsFn(arg0 context.Context, arg1 *emptypb.Empty, arg2 ...grpc.CallOption) (*v1.PartitionsResponse, error) { + m.ctrl.T.Helper() + varargs := []interface{}{arg0, arg1} + for _, a := range arg2 { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "PartitionsFn", varargs...) + ret0, _ := ret[0].(*v1.PartitionsResponse) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// PartitionsFn indicates an expected call of PartitionsFn. +func (mr *MockSourceClientMockRecorder) PartitionsFn(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + varargs := append([]interface{}{arg0, arg1}, arg2...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PartitionsFn", reflect.TypeOf((*MockSourceClient)(nil).PartitionsFn), varargs...) +} + // PendingFn mocks base method. func (m *MockSourceClient) PendingFn(arg0 context.Context, arg1 *emptypb.Empty, arg2 ...grpc.CallOption) (*v1.PendingResponse, error) { m.ctrl.T.Helper() diff --git a/pkg/mapper/examples/even_odd/Makefile b/pkg/mapper/examples/even_odd/Makefile index 9a7a364c..02662628 100644 --- a/pkg/mapper/examples/even_odd/Makefile +++ b/pkg/mapper/examples/even_odd/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/map-even-odd:v0.5.0" --target even-odd . + docker build -t "quay.io/numaio/numaflow-go/map-even-odd:v0.6.0" --target even-odd . clean: -rm -rf ./dist diff --git a/pkg/mapper/examples/even_odd/go.mod b/pkg/mapper/examples/even_odd/go.mod index 54f770a7..4380978e 100644 --- a/pkg/mapper/examples/even_odd/go.mod +++ b/pkg/mapper/examples/even_odd/go.mod @@ -2,7 +2,7 @@ module even_odd go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/mapper/examples/even_odd/go.sum b/pkg/mapper/examples/even_odd/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/mapper/examples/even_odd/go.sum +++ b/pkg/mapper/examples/even_odd/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/mapper/examples/flatmap/Makefile b/pkg/mapper/examples/flatmap/Makefile index 53adac3e..5c8605b1 100644 --- a/pkg/mapper/examples/flatmap/Makefile +++ b/pkg/mapper/examples/flatmap/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/map-flatmap:v0.5.0" --target flatmap . + docker build -t "quay.io/numaio/numaflow-go/map-flatmap:v0.6.0" --target flatmap . clean: -rm -rf ./dist diff --git a/pkg/mapper/examples/flatmap/go.mod b/pkg/mapper/examples/flatmap/go.mod index 54f770a7..f61e5312 100644 --- a/pkg/mapper/examples/flatmap/go.mod +++ b/pkg/mapper/examples/flatmap/go.mod @@ -1,8 +1,8 @@ -module even_odd +module flatmap go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/mapper/examples/flatmap/go.sum b/pkg/mapper/examples/flatmap/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/mapper/examples/flatmap/go.sum +++ b/pkg/mapper/examples/flatmap/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/mapper/examples/forward_message/Makefile b/pkg/mapper/examples/forward_message/Makefile index 148b0b20..d9646043 100644 --- a/pkg/mapper/examples/forward_message/Makefile +++ b/pkg/mapper/examples/forward_message/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/map-forward-message:v0.5.0" --target forward-message . + docker build -t "quay.io/numaio/numaflow-go/map-forward-message:v0.6.0" --target forward-message . clean: -rm -rf ./dist diff --git a/pkg/mapper/examples/forward_message/go.mod b/pkg/mapper/examples/forward_message/go.mod index 54f770a7..4380978e 100644 --- a/pkg/mapper/examples/forward_message/go.mod +++ b/pkg/mapper/examples/forward_message/go.mod @@ -2,7 +2,7 @@ module even_odd go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/mapper/examples/forward_message/go.sum b/pkg/mapper/examples/forward_message/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/mapper/examples/forward_message/go.sum +++ b/pkg/mapper/examples/forward_message/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/mapper/examples/retry/Makefile b/pkg/mapper/examples/retry/Makefile index 8cbe22c0..c75d4910 100644 --- a/pkg/mapper/examples/retry/Makefile +++ b/pkg/mapper/examples/retry/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/map-retry:v0.5.0" --target retry . + docker build -t "quay.io/numaio/numaflow-go/map-retry:v0.6.0" --target retry . clean: -rm -rf ./dist diff --git a/pkg/mapper/examples/retry/go.mod b/pkg/mapper/examples/retry/go.mod index 54f770a7..38ebb5c6 100644 --- a/pkg/mapper/examples/retry/go.mod +++ b/pkg/mapper/examples/retry/go.mod @@ -1,8 +1,8 @@ -module even_odd +module retry go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/mapper/examples/retry/go.sum b/pkg/mapper/examples/retry/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/mapper/examples/retry/go.sum +++ b/pkg/mapper/examples/retry/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/mapper/examples/tickgen/Makefile b/pkg/mapper/examples/tickgen/Makefile index 57498527..dcaf1b85 100644 --- a/pkg/mapper/examples/tickgen/Makefile +++ b/pkg/mapper/examples/tickgen/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/map-tickgen:v1" --target tickgen . + docker build -t "quay.io/numaio/numaflow-go/map-tickgen:v0.6.0" --target tickgen . clean: -rm -rf ./dist diff --git a/pkg/mapper/examples/tickgen/go.mod b/pkg/mapper/examples/tickgen/go.mod index 54f770a7..c9208575 100644 --- a/pkg/mapper/examples/tickgen/go.mod +++ b/pkg/mapper/examples/tickgen/go.mod @@ -1,8 +1,8 @@ -module even_odd +module tickgen go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/mapper/examples/tickgen/go.sum b/pkg/mapper/examples/tickgen/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/mapper/examples/tickgen/go.sum +++ b/pkg/mapper/examples/tickgen/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/mapper/options.go b/pkg/mapper/options.go index d1f3b483..45683be5 100644 --- a/pkg/mapper/options.go +++ b/pkg/mapper/options.go @@ -13,7 +13,7 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -func DefaultOptions() *options { +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/mapper/server.go b/pkg/mapper/server.go index 5a584df7..60399b24 100644 --- a/pkg/mapper/server.go +++ b/pkg/mapper/server.go @@ -20,7 +20,7 @@ type server struct { // NewServer creates a new map server. func NewServer(m Mapper, inputOptions ...Option) numaflow.Server { - opts := DefaultOptions() + opts := defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) } diff --git a/pkg/mapstreamer/examples/flatmap_stream/Makefile b/pkg/mapstreamer/examples/flatmap_stream/Makefile index 96314724..28ca07d4 100644 --- a/pkg/mapstreamer/examples/flatmap_stream/Makefile +++ b/pkg/mapstreamer/examples/flatmap_stream/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/map-flatmap-stream:v0.5.0" --target flatmap_stream . + docker build -t "quay.io/numaio/numaflow-go/map-flatmap-stream:v0.6.0" --target flatmap_stream . clean: -rm -rf ./dist diff --git a/pkg/mapstreamer/examples/flatmap_stream/go.mod b/pkg/mapstreamer/examples/flatmap_stream/go.mod index 54f770a7..3e8ab932 100644 --- a/pkg/mapstreamer/examples/flatmap_stream/go.mod +++ b/pkg/mapstreamer/examples/flatmap_stream/go.mod @@ -1,8 +1,8 @@ -module even_odd +module flatmap_stream go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/mapstreamer/examples/flatmap_stream/go.sum b/pkg/mapstreamer/examples/flatmap_stream/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/mapstreamer/examples/flatmap_stream/go.sum +++ b/pkg/mapstreamer/examples/flatmap_stream/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/mapstreamer/options.go b/pkg/mapstreamer/options.go index 5eb1e609..2b5c9290 100644 --- a/pkg/mapstreamer/options.go +++ b/pkg/mapstreamer/options.go @@ -13,7 +13,7 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -func DefaultOptions() *options { +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/mapstreamer/server.go b/pkg/mapstreamer/server.go index bb8b946c..f080feaf 100644 --- a/pkg/mapstreamer/server.go +++ b/pkg/mapstreamer/server.go @@ -19,7 +19,7 @@ type server struct { // NewServer creates a new map streaming server. func NewServer(ms MapStreamer, inputOptions ...Option) numaflow.Server { - opts := DefaultOptions() + opts := defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) } diff --git a/pkg/reducer/examples/counter/Makefile b/pkg/reducer/examples/counter/Makefile index c5696c14..2b9f3d04 100644 --- a/pkg/reducer/examples/counter/Makefile +++ b/pkg/reducer/examples/counter/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/reduce-counter:v0.5.0" --target counter . + docker build -t "quay.io/numaio/numaflow-go/reduce-counter:v0.6.0" --target counter . clean: -rm -rf ./dist diff --git a/pkg/reducer/examples/counter/go.mod b/pkg/reducer/examples/counter/go.mod index 8b968e1e..21d79eb1 100644 --- a/pkg/reducer/examples/counter/go.mod +++ b/pkg/reducer/examples/counter/go.mod @@ -1,8 +1,8 @@ -module even_odd +module counter go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/reducer/examples/counter/go.sum b/pkg/reducer/examples/counter/go.sum index 18c03144..e4aac392 100644 --- a/pkg/reducer/examples/counter/go.sum +++ b/pkg/reducer/examples/counter/go.sum @@ -4,8 +4,10 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f h1:J43ekeRVzE6WGgkWl5oEQ+c4NT1i4VikMkygu4AeUYE= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/reducer/examples/counter/main.go b/pkg/reducer/examples/counter/main.go index a3868732..4d3cf790 100644 --- a/pkg/reducer/examples/counter/main.go +++ b/pkg/reducer/examples/counter/main.go @@ -7,12 +7,12 @@ import ( "github.com/numaproj/numaflow-go/pkg/reducer" ) -func reduceCounter(_ context.Context, keys []string, reduceCh <-chan reducer.Datum, md reducer.Metadata) reducer.Messages { +func reduceCounter(_ context.Context, keys []string, inputCh <-chan reducer.Datum, md reducer.Metadata) reducer.Messages { // count the incoming events var resultKeys = keys var resultVal []byte var counter = 0 - for range reduceCh { + for range inputCh { counter++ } resultVal = []byte(strconv.Itoa(counter)) @@ -20,5 +20,5 @@ func reduceCounter(_ context.Context, keys []string, reduceCh <-chan reducer.Dat } func main() { - reducer.NewServer(reducer.ReducerFunc(reduceCounter)).Start(context.Background()) + reducer.NewServer(reducer.SimpleCreatorWithReduceFn(reduceCounter)).Start(context.Background()) } diff --git a/pkg/reducer/examples/sum/Makefile b/pkg/reducer/examples/sum/Makefile index dc21aa79..aecdc0dd 100644 --- a/pkg/reducer/examples/sum/Makefile +++ b/pkg/reducer/examples/sum/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/reduce-sum:v0.5.0" --target sum . + docker build -t "quay.io/numaio/numaflow-go/reduce-sum:v0.6.0" --target sum . clean: -rm -rf ./dist diff --git a/pkg/reducer/examples/sum/go.mod b/pkg/reducer/examples/sum/go.mod index 8b968e1e..27de5f05 100644 --- a/pkg/reducer/examples/sum/go.mod +++ b/pkg/reducer/examples/sum/go.mod @@ -1,8 +1,8 @@ -module even_odd +module sum go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/reducer/examples/sum/go.sum b/pkg/reducer/examples/sum/go.sum index 18c03144..e4aac392 100644 --- a/pkg/reducer/examples/sum/go.sum +++ b/pkg/reducer/examples/sum/go.sum @@ -4,8 +4,10 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f h1:J43ekeRVzE6WGgkWl5oEQ+c4NT1i4VikMkygu4AeUYE= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/reducer/examples/sum/main.go b/pkg/reducer/examples/sum/main.go index cd2dffa8..abdb482f 100644 --- a/pkg/reducer/examples/sum/main.go +++ b/pkg/reducer/examples/sum/main.go @@ -9,20 +9,30 @@ import ( "github.com/numaproj/numaflow-go/pkg/reducer" ) +// SumReducerCreator implements the reducer.ReducerCreator interface which creates a reducer +type SumReducerCreator struct { +} + +func (s *SumReducerCreator) Create() reducer.Reducer { + return &Sum{} +} + // Sum is a reducer that sum up the values for the given keys type Sum struct { + sum int } -func (s *Sum) Reduce(ctx context.Context, keys []string, reduceCh <-chan reducer.Datum, md reducer.Metadata) reducer.Messages { +func (s *Sum) Reduce(ctx context.Context, keys []string, inputCh <-chan reducer.Datum, md reducer.Metadata) reducer.Messages { // sum up values for the same keys intervalWindow := md.IntervalWindow() _ = intervalWindow var resultKeys = keys var resultVal []byte - var sum = 0 // sum up the values - for d := range reduceCh { + for d := range inputCh { val := d.Value() + + // event time and watermark can be fetched from the datum eventTime := d.EventTime() _ = eventTime watermark := d.Watermark() @@ -33,14 +43,14 @@ func (s *Sum) Reduce(ctx context.Context, keys []string, reduceCh <-chan reducer fmt.Printf("unable to convert the value to int: %v\n", err) continue } - sum += v + s.sum += v } - resultVal = []byte(strconv.Itoa(sum)) + resultVal = []byte(strconv.Itoa(s.sum)) return reducer.MessagesBuilder().Append(reducer.NewMessage(resultVal).WithKeys(resultKeys)) } func main() { - err := reducer.NewServer(&Sum{}).Start(context.Background()) + err := reducer.NewServer(&SumReducerCreator{}).Start(context.Background()) if err != nil { log.Panic("unable to start the server due to: ", err) } diff --git a/pkg/reducer/interface.go b/pkg/reducer/interface.go index 35b37bbb..e49d858a 100644 --- a/pkg/reducer/interface.go +++ b/pkg/reducer/interface.go @@ -25,13 +25,34 @@ type IntervalWindow interface { // Reducer is the interface of reduce function implementation. type Reducer interface { - Reduce(ctx context.Context, keys []string, reduceCh <-chan Datum, md Metadata) Messages + Reduce(ctx context.Context, keys []string, inputCh <-chan Datum, md Metadata) Messages } -// ReducerFunc is a utility type used to convert a Reduce function to a Reducer. -type ReducerFunc func(ctx context.Context, keys []string, reduceCh <-chan Datum, md Metadata) Messages +// ReducerCreator is the interface which is used to create a Reducer. +type ReducerCreator interface { + // Create creates a Reducer, will be invoked once for every keyed window. + Create() Reducer +} + +// simpleReducerCreator is an implementation of ReducerCreator, which creates a Reducer for the given function. +type simpleReducerCreator struct { + f func(context.Context, []string, <-chan Datum, Metadata) Messages +} + +// Create creates a Reducer for the given function. +func (s *simpleReducerCreator) Create() Reducer { + return reducerFn(s.f) +} + +// SimpleCreatorWithReduceFn creates a simple ReducerCreator for the given reduce function. +func SimpleCreatorWithReduceFn(f func(context.Context, []string, <-chan Datum, Metadata) Messages) ReducerCreator { + return &simpleReducerCreator{f: f} +} + +// reducerFn is a utility type used to convert a Reduce function to a Reducer. +type reducerFn func(ctx context.Context, keys []string, reduceCh <-chan Datum, md Metadata) Messages // Reduce implements the function of reduce function. -func (rf ReducerFunc) Reduce(ctx context.Context, keys []string, reduceCh <-chan Datum, md Metadata) Messages { +func (rf reducerFn) Reduce(ctx context.Context, keys []string, reduceCh <-chan Datum, md Metadata) Messages { return rf(ctx, keys, reduceCh, md) } diff --git a/pkg/reducer/message.go b/pkg/reducer/message.go index e7114887..80209972 100644 --- a/pkg/reducer/message.go +++ b/pkg/reducer/message.go @@ -6,7 +6,7 @@ var ( DROP = fmt.Sprintf("%U__DROP__", '\\') // U+005C__DROP__ ) -// Message is used to wrap the data return by reduce functions +// Message is used to wrap the data return by reduce function type Message struct { value []byte keys []string diff --git a/pkg/reducer/options.go b/pkg/reducer/options.go index e0919a9d..e015cdb8 100644 --- a/pkg/reducer/options.go +++ b/pkg/reducer/options.go @@ -13,7 +13,7 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -func DefaultOptions() *options { +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/reducer/server.go b/pkg/reducer/server.go index 81421481..53989ebf 100644 --- a/pkg/reducer/server.go +++ b/pkg/reducer/server.go @@ -6,7 +6,7 @@ import ( "os/signal" "syscall" - "github.com/numaproj/numaflow-go/pkg" + numaflow "github.com/numaproj/numaflow-go/pkg" reducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" "github.com/numaproj/numaflow-go/pkg/shared" ) @@ -18,14 +18,14 @@ type server struct { } // NewServer creates a new reduce server. -func NewServer(r Reducer, inputOptions ...Option) numaflow.Server { - opts := DefaultOptions() +func NewServer(r ReducerCreator, inputOptions ...Option) numaflow.Server { + opts := defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) } s := new(server) s.svc = new(Service) - s.svc.Reducer = r + s.svc.reducerCreatorHandle = r s.opts = opts return s } diff --git a/pkg/reducer/server_test.go b/pkg/reducer/server_test.go index 511bebb8..e5a29b2f 100644 --- a/pkg/reducer/server_test.go +++ b/pkg/reducer/server_test.go @@ -21,17 +21,18 @@ func TestReduceServer_Start(t *testing.T) { _ = os.RemoveAll(serverInfoFile.Name()) }() - var reduceHandler = ReducerFunc(func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { + var rfn = func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { sum := 0 for val := range rch { msgVal, _ := strconv.Atoi(string(val.Value())) sum += msgVal } return MessagesBuilder().Append(NewMessage([]byte(strconv.Itoa(sum))).WithKeys([]string{keys[0] + "_test"})) - }) + } + // note: using actual uds connection ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) defer cancel() - err := NewServer(reduceHandler, WithSockAddr(socketFile.Name()), WithServerInfoFilePath(serverInfoFile.Name())).Start(ctx) + err := NewServer(SimpleCreatorWithReduceFn(rfn), WithSockAddr(socketFile.Name()), WithServerInfoFilePath(serverInfoFile.Name())).Start(ctx) assert.NoError(t, err) } diff --git a/pkg/reducer/service.go b/pkg/reducer/service.go index 76610ef6..1a5148a8 100644 --- a/pkg/reducer/service.go +++ b/pkg/reducer/service.go @@ -2,16 +2,10 @@ package reducer import ( "context" - "fmt" "io" - "strconv" - "strings" - "sync" - "time" "golang.org/x/sync/errgroup" "google.golang.org/grpc/codes" - grpcmd "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" "google.golang.org/protobuf/types/known/emptypb" @@ -30,8 +24,7 @@ const ( // Service implements the proto gen server interface and contains the reduce operation handler. type Service struct { reducepb.UnimplementedReduceServer - - Reducer Reducer + reducerCreatorHandle ReducerCreator } // IsReady returns true to indicate the gRPC connection is ready. @@ -42,44 +35,23 @@ func (fs *Service) IsReady(context.Context, *emptypb.Empty) (*reducepb.ReadyResp // ReduceFn applies a reduce function to a request stream and returns a list of results. func (fs *Service) ReduceFn(stream reducepb.Reduce_ReduceFnServer) error { var ( - md Metadata - err error - startTime int64 - endTime int64 - ctx = stream.Context() - chanMap = make(map[string]chan Datum) - mu sync.RWMutex - g errgroup.Group + err error + ctx = stream.Context() + g errgroup.Group ) - grpcMD, ok := grpcmd.FromIncomingContext(ctx) - if !ok { - statusErr := status.Errorf(codes.InvalidArgument, "keys and window information are not passed in grpc metadata") - return statusErr - } - - // get window start and end time from grpc metadata - var st, et string - st, err = getValueFromMetadata(grpcMD, winStartTime) - if err != nil { - statusErr := status.Errorf(codes.InvalidArgument, err.Error()) - return statusErr - } - - et, err = getValueFromMetadata(grpcMD, winEndTime) - if err != nil { - statusErr := status.Errorf(codes.InvalidArgument, err.Error()) - return statusErr - } + taskManager := newReduceTaskManager(fs.reducerCreatorHandle) - startTime, _ = strconv.ParseInt(st, 10, 64) - endTime, _ = strconv.ParseInt(et, 10, 64) - - // create interval window interface using the start and end time - iw := NewIntervalWindow(time.UnixMilli(startTime), time.UnixMilli(endTime)) - - // create metadata using interval window interface - md = NewMetadata(iw) + // err group for the go routine which reads from the output channel and sends to the stream + g.Go(func() error { + for output := range taskManager.OutputChannel() { + sendErr := stream.Send(output) + if sendErr != nil { + return sendErr + } + } + return nil + }) // read messages from the stream and write the messages to corresponding channels // if the channel is not created, create the channel and invoke the reduceFn @@ -87,84 +59,43 @@ func (fs *Service) ReduceFn(stream reducepb.Reduce_ReduceFnServer) error { d, recvErr := stream.Recv() // if EOF, close all the channels if recvErr == io.EOF { - closeChannels(chanMap) + taskManager.CloseAll() break } if recvErr != nil { - closeChannels(chanMap) // the error here is returned by stream.Recv() // it's already a gRPC error return recvErr } - unifiedKey := strings.Join(d.GetKeys(), delimiter) - var hd = NewHandlerDatum(d.GetValue(), d.EventTime.AsTime(), d.Watermark.AsTime()) - ch, chok := chanMap[unifiedKey] - if !chok { - ch = make(chan Datum) - chanMap[unifiedKey] = ch - - func(k []string, ch chan Datum) { - g.Go(func() error { - // we stream the messages to the user by writing messages - // to channel and wait until we get the result and stream - // the result back to the client (numaflow). - messages := fs.Reducer.Reduce(ctx, k, ch, md) - datumList := buildDatumList(messages) - - // stream.Send() is not thread safe. - mu.Lock() - defer mu.Unlock() - sendErr := stream.Send(datumList) - if sendErr != nil { - // the error here is returned by stream.Send() - // it's already a gRPC error - return sendErr - } - return nil - }) - }(d.GetKeys(), ch) + // for Aligned windows, its just open or append operation + // close signal will be sent to all the reducers when grpc + // input stream gets EOF. + switch d.Operation.Event { + case reducepb.ReduceRequest_WindowOperation_OPEN: + // create a new reduce task and start the reduce operation + err = taskManager.CreateTask(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + case reducepb.ReduceRequest_WindowOperation_APPEND: + // append the datum to the reduce task + err = taskManager.AppendToTask(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } } - ch <- hd } - // wait until all the mapfn return - return g.Wait() -} - -func buildDatumList(messages Messages) *reducepb.ReduceResponse { - response := &reducepb.ReduceResponse{} - for _, msg := range messages { - response.Results = append(response.Results, &reducepb.ReduceResponse_Result{ - Keys: msg.Keys(), - Value: msg.Value(), - Tags: msg.Tags(), - }) - } - - return response -} - -func closeChannels(chanMap map[string]chan Datum) { - for _, ch := range chanMap { - close(ch) + taskManager.WaitAll() + // wait for the go routine which reads from the output channel and sends to the stream to return + err = g.Wait() + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr } -} -func getValueFromMetadata(md grpcmd.MD, k string) (string, error) { - var value string - - keyValue := md.Get(k) - - if len(keyValue) > 1 { - return value, fmt.Errorf("expected extactly one value for keys %s in metadata but got %d values, %s", k, len(keyValue), keyValue) - } else if len(keyValue) == 1 { - value = keyValue[0] - } else { - // the length equals zero is invalid for reduce - // since we are using a global keys, and start and end time - // cannot be empty - return value, fmt.Errorf("expected non empty value for keys %s in metadata but got an empty value", k) - } - return value, nil + return nil } diff --git a/pkg/reducer/service_test.go b/pkg/reducer/service_test.go index 8eb6e488..485cd3ba 100644 --- a/pkg/reducer/service_test.go +++ b/pkg/reducer/service_test.go @@ -55,192 +55,411 @@ func TestService_ReduceFn(t *testing.T) { tests := []struct { name string - handler Reducer + handler func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages input []*reducepb.ReduceRequest - expected *reducepb.ReduceResponse + expected []*reducepb.ReduceResponse expectedErr bool }{ { name: "reduce_fn_forward_msg_same_keys", - handler: ReducerFunc(func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { + handler: func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { sum := 0 for val := range rch { msgVal, _ := strconv.Atoi(string(val.Value())) sum += msgVal } return MessagesBuilder().Append(NewMessage([]byte(strconv.Itoa(sum))).WithKeys([]string{keys[0] + "_test"})) - }), + }, input: []*reducepb.ReduceRequest{ { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(10)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(20)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(30)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, }, - expected: &reducepb.ReduceResponse{ - Results: []*reducepb.ReduceResponse_Result{ - { + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ Keys: []string{"client_test"}, Value: []byte(strconv.Itoa(60)), }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, }, }, expectedErr: false, }, { name: "reduce_fn_forward_msg_multiple_keys", - handler: ReducerFunc(func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { + handler: func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { sum := 0 for val := range rch { msgVal, _ := strconv.Atoi(string(val.Value())) sum += msgVal } return MessagesBuilder().Append(NewMessage([]byte(strconv.Itoa(sum))).WithKeys([]string{keys[0] + "_test"})) - }), + }, input: []*reducepb.ReduceRequest{ { - Keys: []string{"client1"}, - Value: []byte(strconv.Itoa(10)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client2"}, - Value: []byte(strconv.Itoa(20)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client3"}, - Value: []byte(strconv.Itoa(30)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client3"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client1"}, - Value: []byte(strconv.Itoa(10)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client2"}, - Value: []byte(strconv.Itoa(20)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client3"}, - Value: []byte(strconv.Itoa(30)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client3"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, }, - expected: &reducepb.ReduceResponse{ - Results: []*reducepb.ReduceResponse_Result{ - { + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ Keys: []string{"client1_test"}, Value: []byte(strconv.Itoa(20)), }, - { + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + { + Result: &reducepb.ReduceResponse_Result{ Keys: []string{"client2_test"}, Value: []byte(strconv.Itoa(40)), }, - { + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + { + Result: &reducepb.ReduceResponse_Result{ Keys: []string{"client3_test"}, Value: []byte(strconv.Itoa(60)), }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, }, }, expectedErr: false, }, { name: "reduce_fn_forward_msg_forward_to_all", - handler: ReducerFunc(func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { + handler: func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { sum := 0 for val := range rch { msgVal, _ := strconv.Atoi(string(val.Value())) sum += msgVal } return MessagesBuilder().Append(NewMessage([]byte(strconv.Itoa(sum)))) - }), + }, input: []*reducepb.ReduceRequest{ { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(10)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(20)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(30)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, }, - expected: &reducepb.ReduceResponse{ - Results: []*reducepb.ReduceResponse_Result{ - { + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ Value: []byte(strconv.Itoa(60)), }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, }, }, - expectedErr: false, }, { name: "reduce_fn_forward_msg_drop_msg", - handler: ReducerFunc(func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { + handler: func(ctx context.Context, keys []string, rch <-chan Datum, md Metadata) Messages { sum := 0 for val := range rch { msgVal, _ := strconv.Atoi(string(val.Value())) sum += msgVal } return MessagesBuilder().Append(MessageToDrop()) - }), + }, input: []*reducepb.ReduceRequest{ { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(10)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(20)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, { - Keys: []string{"client"}, - Value: []byte(strconv.Itoa(30)), - EventTime: timestamppb.New(time.Time{}), - Watermark: timestamppb.New(time.Time{}), + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, }, }, - expected: &reducepb.ReduceResponse{ - Results: []*reducepb.ReduceResponse_Result{ - { + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ Tags: []string{DROP}, Value: []byte{}, }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, }, }, expectedErr: false, @@ -249,7 +468,7 @@ func TestService_ReduceFn(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { fs := &Service{ - Reducer: tt.handler, + reducerCreatorHandle: SimpleCreatorWithReduceFn(tt.handler), } // here's a trick for testing: // because we are not using gRPC, we directly set a new incoming ctx @@ -258,7 +477,7 @@ func TestService_ReduceFn(t *testing.T) { inputCh := make(chan *reducepb.ReduceRequest) outputCh := make(chan *reducepb.ReduceResponse) - result := &reducepb.ReduceResponse{} + result := make([]*reducepb.ReduceResponse, 0) udfReduceFnStream := NewReduceFnServerTest(ctx, inputCh, outputCh) @@ -276,7 +495,9 @@ func TestService_ReduceFn(t *testing.T) { go func() { defer wg.Done() for msg := range outputCh { - result.Results = append(result.Results, msg.Results...) + if !msg.EOF { + result = append(result, msg) + } } }() @@ -292,8 +513,8 @@ func TestService_ReduceFn(t *testing.T) { } //sort and compare, since order of the output doesn't matter - sort.Slice(result.Results, func(i, j int) bool { - return string(result.Results[i].Value) < string(result.Results[j].Value) + sort.Slice(result, func(i, j int) bool { + return string(result[i].Result.Value) < string(result[j].Result.Value) }) if !reflect.DeepEqual(result, tt.expected) { diff --git a/pkg/reducer/task_manager.go b/pkg/reducer/task_manager.go new file mode 100644 index 00000000..53e37503 --- /dev/null +++ b/pkg/reducer/task_manager.go @@ -0,0 +1,170 @@ +package reducer + +import ( + "context" + "fmt" + "strings" + + v1 "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" +) + +// reduceTask represents a task for a performing reduceStream operation. +type reduceTask struct { + keys []string + window *v1.Window + reducer Reducer + inputCh chan Datum + outputCh chan Message + doneCh chan struct{} +} + +// buildReduceResponse builds the reduce response from the messages. +func (rt *reduceTask) buildReduceResponse(message Message) *v1.ReduceResponse { + + response := &v1.ReduceResponse{ + Result: &v1.ReduceResponse_Result{ + Keys: message.Keys(), + Value: message.Value(), + Tags: message.Tags(), + }, + Window: rt.window, + } + + return response +} + +func (rt *reduceTask) buildEOFResponse() *v1.ReduceResponse { + response := &v1.ReduceResponse{ + Window: rt.window, + EOF: true, + } + + return response +} + +// uniqueKey returns the unique key for the reduce task to be used in the task manager to identify the task. +func (rt *reduceTask) uniqueKey() string { + return fmt.Sprintf("%d:%d:%s", + rt.window.GetStart().AsTime().UnixMilli(), + rt.window.GetEnd().AsTime().UnixMilli(), + strings.Join(rt.keys, delimiter)) +} + +// reduceTaskManager manages the reduce tasks for a reduce operation. +type reduceTaskManager struct { + reducerCreatorHandle ReducerCreator + tasks map[string]*reduceTask + responseCh chan *v1.ReduceResponse +} + +func newReduceTaskManager(reducerCreatorHandle ReducerCreator) *reduceTaskManager { + return &reduceTaskManager{ + reducerCreatorHandle: reducerCreatorHandle, + tasks: make(map[string]*reduceTask), + responseCh: make(chan *v1.ReduceResponse), + } +} + +// CreateTask creates a new reduce task and starts the reduce operation. +func (rtm *reduceTaskManager) CreateTask(ctx context.Context, request *v1.ReduceRequest) error { + if len(request.Operation.Windows) != 1 { + return fmt.Errorf("create operation error: invalid number of windows") + } + + md := NewMetadata(NewIntervalWindow(request.Operation.Windows[0].GetStart().AsTime(), + request.Operation.Windows[0].GetEnd().AsTime())) + + task := &reduceTask{ + keys: request.GetPayload().GetKeys(), + window: request.Operation.Windows[0], + inputCh: make(chan Datum), + outputCh: make(chan Message), + doneCh: make(chan struct{}), + } + + key := task.uniqueKey() + rtm.tasks[key] = task + + go func() { + // invoke the reduce function + // create a new reducer, since we got a new key + reducerHandle := rtm.reducerCreatorHandle.Create() + messages := reducerHandle.Reduce(ctx, request.GetPayload().GetKeys(), task.inputCh, md) + + for _, message := range messages { + // write the output to the output channel, service will forward it to downstream + rtm.responseCh <- task.buildReduceResponse(message) + } + // send EOF + rtm.responseCh <- task.buildEOFResponse() + // close the output channel after the reduce function is done + close(task.outputCh) + // send a done signal + close(task.doneCh) + }() + + // write the first message to the input channel + task.inputCh <- buildDatum(request) + return nil +} + +// AppendToTask writes the message to the reduce task. +// If the task is not found, it creates a new task and starts the reduce operation. +func (rtm *reduceTaskManager) AppendToTask(ctx context.Context, request *v1.ReduceRequest) error { + if len(request.Operation.Windows) != 1 { + return fmt.Errorf("append operation error: invalid number of windows") + } + + task, ok := rtm.tasks[generateKey(request.Operation.Windows[0], request.Payload.Keys)] + + // if the task is not found, create a new task + if !ok { + return rtm.CreateTask(ctx, request) + } + + task.inputCh <- buildDatum(request) + return nil +} + +// OutputChannel returns the output channel for the reduce task manager to read the results. +func (rtm *reduceTaskManager) OutputChannel() <-chan *v1.ReduceResponse { + return rtm.responseCh +} + +// WaitAll waits for all the reduce tasks to complete. +func (rtm *reduceTaskManager) WaitAll() { + tasks := make([]*reduceTask, 0, len(rtm.tasks)) + for _, task := range rtm.tasks { + tasks = append(tasks, task) + } + + for _, task := range tasks { + <-task.doneCh + } + + // after all the tasks are completed, close the output channel + close(rtm.responseCh) +} + +// CloseAll closes all the reduce tasks. +func (rtm *reduceTaskManager) CloseAll() { + tasks := make([]*reduceTask, 0, len(rtm.tasks)) + for _, task := range rtm.tasks { + tasks = append(tasks, task) + } + + for _, task := range tasks { + close(task.inputCh) + } +} + +func generateKey(window *v1.Window, keys []string) string { + return fmt.Sprintf("%d:%d:%s", + window.GetStart().AsTime().UnixMilli(), + window.GetEnd().AsTime().UnixMilli(), + strings.Join(keys, delimiter)) +} + +func buildDatum(request *v1.ReduceRequest) Datum { + return NewHandlerDatum(request.Payload.GetValue(), request.Payload.EventTime.AsTime(), request.Payload.Watermark.AsTime()) +} diff --git a/pkg/reducestreamer/doc.go b/pkg/reducestreamer/doc.go new file mode 100644 index 00000000..046c779e --- /dev/null +++ b/pkg/reducestreamer/doc.go @@ -0,0 +1,5 @@ +// Package reduceStreamer implements the server code for reduceStream operation. + +// Examples: https://github.com/numaproj/numaflow-go/tree/main/pkg/reducestreamer/examples/ + +package reducestreamer diff --git a/pkg/reducestreamer/examples/counter/Dockerfile b/pkg/reducestreamer/examples/counter/Dockerfile new file mode 100644 index 00000000..7f1e3e5c --- /dev/null +++ b/pkg/reducestreamer/examples/counter/Dockerfile @@ -0,0 +1,20 @@ +#################################################################################################### +# base +#################################################################################################### +FROM alpine:3.12.3 as base +RUN apk update && apk upgrade && \ + apk add ca-certificates && \ + apk --no-cache add tzdata + +COPY dist/counter-example /bin/counter-example +RUN chmod +x /bin/counter-example + +#################################################################################################### +# counter +#################################################################################################### +FROM scratch as counter +ARG ARCH +COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo +COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt +COPY --from=base /bin/counter-example /bin/counter-example +ENTRYPOINT [ "/bin/counter-example" ] diff --git a/pkg/reducestreamer/examples/counter/Makefile b/pkg/reducestreamer/examples/counter/Makefile new file mode 100644 index 00000000..ffe3036c --- /dev/null +++ b/pkg/reducestreamer/examples/counter/Makefile @@ -0,0 +1,10 @@ +.PHONY: build +build: + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -v -o ./dist/counter-example main.go + +.PHONY: image +image: build + docker build -t "quay.io/numaio/numaflow-go/reduce-stream-counter:v0.5.3" --target counter . + +clean: + -rm -rf ./dist diff --git a/pkg/reducestreamer/examples/counter/README.md b/pkg/reducestreamer/examples/counter/README.md new file mode 100644 index 00000000..8ba9bd9f --- /dev/null +++ b/pkg/reducestreamer/examples/counter/README.md @@ -0,0 +1,3 @@ +# Counter + +An example User Defined Function that count the incoming events and output the count every 10 events. diff --git a/pkg/reducestreamer/examples/counter/go.mod b/pkg/reducestreamer/examples/counter/go.mod new file mode 100644 index 00000000..21d79eb1 --- /dev/null +++ b/pkg/reducestreamer/examples/counter/go.mod @@ -0,0 +1,16 @@ +module counter + +go 1.20 + +require github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f + +require ( + github.com/golang/protobuf v1.5.3 // indirect + golang.org/x/net v0.9.0 // indirect + golang.org/x/sync v0.1.0 // indirect + golang.org/x/sys v0.7.0 // indirect + golang.org/x/text v0.9.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect + google.golang.org/grpc v1.57.0 // indirect + google.golang.org/protobuf v1.31.0 // indirect +) diff --git a/pkg/reducestreamer/examples/counter/go.sum b/pkg/reducestreamer/examples/counter/go.sum new file mode 100644 index 00000000..32cb64f7 --- /dev/null +++ b/pkg/reducestreamer/examples/counter/go.sum @@ -0,0 +1,30 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/numaproj/numaflow-go v0.5.3-0.20231211071430-1231c4c278e0 h1:aX6z3AIiJzA0XySqAZhP5ytZDZ3jcsQQnL81HP5mipU= +github.com/numaproj/numaflow-go v0.5.3-0.20231211071430-1231c4c278e0/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f h1:J43ekeRVzE6WGgkWl5oEQ+c4NT1i4VikMkygu4AeUYE= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 h1:0nDDozoAU19Qb2HwhXadU8OcsiO/09cnTqhUtq2MEOM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= +google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= +google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/reducestreamer/examples/counter/main.go b/pkg/reducestreamer/examples/counter/main.go new file mode 100644 index 00000000..9b1caf9b --- /dev/null +++ b/pkg/reducestreamer/examples/counter/main.go @@ -0,0 +1,31 @@ +package main + +import ( + "context" + "strconv" + + "github.com/numaproj/numaflow-go/pkg/reducestreamer" +) + +// reduceCounter is a ReduceStreamer that count the incoming events and output the count every 10 events. +// The output message is the count of the events. +func reduceCounter(_ context.Context, keys []string, inputCh <-chan reducestreamer.Datum, outputCh chan<- reducestreamer.Message, md reducestreamer.Metadata) { + // count the incoming events + var resultKeys = keys + var resultVal []byte + var counter = 0 + for range inputCh { + counter++ + if counter >= 10 { + resultVal = []byte(strconv.Itoa(counter)) + outputCh <- reducestreamer.NewMessage(resultVal).WithKeys(resultKeys) + counter = 0 + } + } + resultVal = []byte(strconv.Itoa(counter)) + outputCh <- reducestreamer.NewMessage(resultVal).WithKeys(resultKeys) +} + +func main() { + reducestreamer.NewServer(reducestreamer.SimpleCreatorWithReduceStreamFn(reduceCounter)).Start(context.Background()) +} diff --git a/pkg/reducestreamer/examples/sum/Dockerfile b/pkg/reducestreamer/examples/sum/Dockerfile new file mode 100644 index 00000000..4b237f86 --- /dev/null +++ b/pkg/reducestreamer/examples/sum/Dockerfile @@ -0,0 +1,20 @@ +#################################################################################################### +# base +#################################################################################################### +FROM alpine:3.12.3 as base +RUN apk update && apk upgrade && \ + apk add ca-certificates && \ + apk --no-cache add tzdata + +COPY dist/sum-example /bin/sum-example +RUN chmod +x /bin/sum-example + +#################################################################################################### +# sum +#################################################################################################### +FROM scratch as sum +ARG ARCH +COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo +COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt +COPY --from=base /bin/sum-example /bin/sum-example +ENTRYPOINT [ "/bin/sum-example" ] diff --git a/pkg/reducestreamer/examples/sum/Makefile b/pkg/reducestreamer/examples/sum/Makefile new file mode 100644 index 00000000..ec0a4e6a --- /dev/null +++ b/pkg/reducestreamer/examples/sum/Makefile @@ -0,0 +1,10 @@ +.PHONY: build +build: + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -v -o ./dist/sum-example main.go + +.PHONY: image +image: build + docker build -t "quay.io/numaio/numaflow-go/reduce-stream-sum:v0.5.3" --target sum . + +clean: + -rm -rf ./dist diff --git a/pkg/reducestreamer/examples/sum/README.md b/pkg/reducestreamer/examples/sum/README.md new file mode 100644 index 00000000..1029db38 --- /dev/null +++ b/pkg/reducestreamer/examples/sum/README.md @@ -0,0 +1,3 @@ +# Sum + +This is a User Defined Function example which sum up the values for the given keys and output the sum when the sum is greater than 100 diff --git a/pkg/reducestreamer/examples/sum/go.mod b/pkg/reducestreamer/examples/sum/go.mod new file mode 100644 index 00000000..27de5f05 --- /dev/null +++ b/pkg/reducestreamer/examples/sum/go.mod @@ -0,0 +1,16 @@ +module sum + +go 1.20 + +require github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f + +require ( + github.com/golang/protobuf v1.5.3 // indirect + golang.org/x/net v0.9.0 // indirect + golang.org/x/sync v0.1.0 // indirect + golang.org/x/sys v0.7.0 // indirect + golang.org/x/text v0.9.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect + google.golang.org/grpc v1.57.0 // indirect + google.golang.org/protobuf v1.31.0 // indirect +) diff --git a/pkg/reducestreamer/examples/sum/go.sum b/pkg/reducestreamer/examples/sum/go.sum new file mode 100644 index 00000000..32cb64f7 --- /dev/null +++ b/pkg/reducestreamer/examples/sum/go.sum @@ -0,0 +1,30 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/numaproj/numaflow-go v0.5.3-0.20231211071430-1231c4c278e0 h1:aX6z3AIiJzA0XySqAZhP5ytZDZ3jcsQQnL81HP5mipU= +github.com/numaproj/numaflow-go v0.5.3-0.20231211071430-1231c4c278e0/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f h1:J43ekeRVzE6WGgkWl5oEQ+c4NT1i4VikMkygu4AeUYE= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 h1:0nDDozoAU19Qb2HwhXadU8OcsiO/09cnTqhUtq2MEOM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= +google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= +google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/reducestreamer/examples/sum/main.go b/pkg/reducestreamer/examples/sum/main.go new file mode 100644 index 00000000..8aa66720 --- /dev/null +++ b/pkg/reducestreamer/examples/sum/main.go @@ -0,0 +1,60 @@ +package main + +import ( + "context" + "fmt" + "log" + "strconv" + + "github.com/numaproj/numaflow-go/pkg/reducestreamer" +) + +// Sum is a reducestreamer that sum up the values for the given keys and output the sum when the sum is greater than 100. +type Sum struct { +} + +func (s *Sum) ReduceStream(ctx context.Context, keys []string, inputCh <-chan reducestreamer.Datum, outputCh chan<- reducestreamer.Message, md reducestreamer.Metadata) { + // sum up values for the same keys + intervalWindow := md.IntervalWindow() + _ = intervalWindow + var resultKeys = keys + var resultVal []byte + var sum = 0 + // sum up the values + for d := range inputCh { + val := d.Value() + eventTime := d.EventTime() + _ = eventTime + watermark := d.Watermark() + _ = watermark + + v, err := strconv.Atoi(string(val)) + if err != nil { + fmt.Printf("unable to convert the value to int: %v\n", err) + continue + } + sum += v + + if sum >= 100 { + resultVal = []byte(strconv.Itoa(sum)) + outputCh <- reducestreamer.NewMessage(resultVal).WithKeys(resultKeys) + sum = 0 + } + } + resultVal = []byte(strconv.Itoa(sum)) + outputCh <- reducestreamer.NewMessage(resultVal).WithKeys(resultKeys) +} + +// SumCreator is the creator for the sum reducestreamer. +type SumCreator struct{} + +func (s *SumCreator) Create() reducestreamer.ReduceStreamer { + return &Sum{} +} + +func main() { + err := reducestreamer.NewServer(&SumCreator{}).Start(context.Background()) + if err != nil { + log.Panic("unable to start the server due to: ", err) + } +} diff --git a/pkg/reducestreamer/interface.go b/pkg/reducestreamer/interface.go new file mode 100644 index 00000000..40dd4faa --- /dev/null +++ b/pkg/reducestreamer/interface.go @@ -0,0 +1,58 @@ +package reducestreamer + +import ( + "context" + "time" +) + +// Datum contains methods to get the payload information. +type Datum interface { + Value() []byte + EventTime() time.Time + Watermark() time.Time +} + +// Metadata contains methods to get the metadata for the reduceStream operation. +type Metadata interface { + IntervalWindow() IntervalWindow +} + +// IntervalWindow contains methods to get the information for a given interval window. +type IntervalWindow interface { + StartTime() time.Time + EndTime() time.Time +} + +// ReduceStreamer is the interface of reduceStream function implementation. +type ReduceStreamer interface { + ReduceStream(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message, md Metadata) +} + +// ReduceStreamerCreator is the interface which is used to create a ReduceStreamer. +type ReduceStreamerCreator interface { + // Create creates a ReduceStreamer, will be invoked once for every keyed window. + Create() ReduceStreamer +} + +// simpleReducerCreator is an implementation of ReduceStreamerCreator, which creates a ReduceStreamer for the given function. +type simpleReduceStreamerCreator struct { + f func(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message, md Metadata) +} + +// Create creates a Reducer for the given function. +func (s *simpleReduceStreamerCreator) Create() ReduceStreamer { + return reduceStreamFn(s.f) +} + +// SimpleCreatorWithReduceStreamFn creates a simple ReduceStreamerCreator for the given reduceStream function. +func SimpleCreatorWithReduceStreamFn(f func(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message, md Metadata)) ReduceStreamerCreator { + return &simpleReduceStreamerCreator{f: f} +} + +// reduceStreamFn is a utility type used to convert a ReduceStream function to a ReduceStreamer. +type reduceStreamFn func(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message, md Metadata) + +// ReduceStream implements the function of ReduceStreamer interface. +func (rf reduceStreamFn) ReduceStream(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message, md Metadata) { + rf(ctx, keys, inputCh, outputCh, md) +} diff --git a/pkg/reducestreamer/message.go b/pkg/reducestreamer/message.go new file mode 100644 index 00000000..2fdf5c64 --- /dev/null +++ b/pkg/reducestreamer/message.go @@ -0,0 +1,52 @@ +package reducestreamer + +import "fmt" + +var ( + DROP = fmt.Sprintf("%U__DROP__", '\\') // U+005C__DROP__ +) + +// Message is used to wrap the data return by reduceStream function +type Message struct { + value []byte + keys []string + tags []string +} + +// NewMessage creates a Message with value +func NewMessage(value []byte) Message { + return Message{value: value} +} + +// MessageToDrop creates a Message to be dropped +func MessageToDrop() Message { + return Message{value: []byte{}, tags: []string{DROP}} +} + +// WithKeys is used to assign the keys to the message +func (m Message) WithKeys(keys []string) Message { + m.keys = keys + return m +} + +// WithTags is used to assign the tags to the message +// tags will be used for conditional forwarding +func (m Message) WithTags(tags []string) Message { + m.tags = tags + return m +} + +// Keys returns message keys +func (m Message) Keys() []string { + return m.keys +} + +// Value returns message value +func (m Message) Value() []byte { + return m.value +} + +// Tags returns message tags +func (m Message) Tags() []string { + return m.tags +} diff --git a/pkg/reducestreamer/options.go b/pkg/reducestreamer/options.go new file mode 100644 index 00000000..3cabe0df --- /dev/null +++ b/pkg/reducestreamer/options.go @@ -0,0 +1,43 @@ +package reducestreamer + +import ( + "github.com/numaproj/numaflow-go/pkg/info" +) + +type options struct { + sockAddr string + maxMessageSize int + serverInfoFilePath string +} + +// Option is the interface to apply options. +type Option func(*options) + +func defaultOptions() *options { + return &options{ + sockAddr: address, + maxMessageSize: defaultMaxMessageSize, + serverInfoFilePath: info.ServerInfoFilePath, + } +} + +// WithMaxMessageSize sets the server max receive message size and the server max send message size to the given size. +func WithMaxMessageSize(size int) Option { + return func(opts *options) { + opts.maxMessageSize = size + } +} + +// WithSockAddr start the server with the given sock addr. This is mainly used for testing purposes. +func WithSockAddr(addr string) Option { + return func(opts *options) { + opts.sockAddr = addr + } +} + +// WithServerInfoFilePath sets the server info file path to the given path. +func WithServerInfoFilePath(f string) Option { + return func(opts *options) { + opts.serverInfoFilePath = f + } +} diff --git a/pkg/reducestreamer/options_test.go b/pkg/reducestreamer/options_test.go new file mode 100644 index 00000000..947d80e0 --- /dev/null +++ b/pkg/reducestreamer/options_test.go @@ -0,0 +1,18 @@ +package reducestreamer + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWithMaxMessageSize(t *testing.T) { + var ( + size = 1024 * 1024 * 10 + opts = &options{ + maxMessageSize: defaultMaxMessageSize, + } + ) + WithMaxMessageSize(1024 * 1024 * 10)(opts) + assert.Equal(t, size, opts.maxMessageSize) +} diff --git a/pkg/reducestreamer/server.go b/pkg/reducestreamer/server.go new file mode 100644 index 00000000..b24eda57 --- /dev/null +++ b/pkg/reducestreamer/server.go @@ -0,0 +1,56 @@ +package reducestreamer + +import ( + "context" + "fmt" + "os/signal" + "syscall" + + "github.com/numaproj/numaflow-go/pkg" + reducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" + "github.com/numaproj/numaflow-go/pkg/shared" +) + +// server is a reduceStream gRPC server. +type server struct { + svc *Service + opts *options +} + +// NewServer creates a new reduceStream server. +func NewServer(r ReduceStreamerCreator, inputOptions ...Option) numaflow.Server { + opts := defaultOptions() + for _, inputOption := range inputOptions { + inputOption(opts) + } + s := new(server) + s.svc = new(Service) + s.svc.creatorHandle = r + s.opts = opts + return s +} + +// Start starts the reduceStream gRPC server. +func (r *server) Start(ctx context.Context) error { + ctxWithSignal, stop := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM) + defer stop() + + // write server info to the file + // start listening on unix domain socket + lis, err := shared.PrepareServer(r.opts.sockAddr, r.opts.serverInfoFilePath) + if err != nil { + return fmt.Errorf("failed to execute net.Listen(%q, %q): %v", uds, address, err) + } + // close the listener + defer func() { _ = lis.Close() }() + + // create a grpc server + grpcServer := shared.CreateGRPCServer(r.opts.maxMessageSize) + defer grpcServer.GracefulStop() + + // register the reduceStream service + reducepb.RegisterReduceServer(grpcServer, r.svc) + + // start the grpc server + return shared.StartGRPCServer(ctxWithSignal, grpcServer, lis) +} diff --git a/pkg/reducestreamer/server_test.go b/pkg/reducestreamer/server_test.go new file mode 100644 index 00000000..aa595492 --- /dev/null +++ b/pkg/reducestreamer/server_test.go @@ -0,0 +1,37 @@ +package reducestreamer + +import ( + "context" + "os" + "strconv" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestReduceServer_Start(t *testing.T) { + socketFile, _ := os.CreateTemp("/tmp", "numaflow-test.sock") + defer func() { + _ = os.RemoveAll(socketFile.Name()) + }() + + serverInfoFile, _ := os.CreateTemp("/tmp", "numaflow-test-info") + defer func() { + _ = os.RemoveAll(serverInfoFile.Name()) + }() + + var reduceStreamHandle = func(ctx context.Context, keys []string, rch <-chan Datum, och chan<- Message, md Metadata) { + sum := 0 + for val := range rch { + msgVal, _ := strconv.Atoi(string(val.Value())) + sum += msgVal + } + och <- NewMessage([]byte(strconv.Itoa(sum))) + } + // note: using actual uds connection + ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) + defer cancel() + err := NewServer(SimpleCreatorWithReduceStreamFn(reduceStreamHandle), WithSockAddr(socketFile.Name()), WithServerInfoFilePath(serverInfoFile.Name())).Start(ctx) + assert.NoError(t, err) +} diff --git a/pkg/reducestreamer/service.go b/pkg/reducestreamer/service.go new file mode 100644 index 00000000..641ce86d --- /dev/null +++ b/pkg/reducestreamer/service.go @@ -0,0 +1,101 @@ +package reducestreamer + +import ( + "context" + "io" + + "golang.org/x/sync/errgroup" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/emptypb" + + reducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" +) + +const ( + uds = "unix" + defaultMaxMessageSize = 1024 * 1024 * 64 + address = "/var/run/numaflow/reducestream.sock" + winStartTime = "x-numaflow-win-start-time" + winEndTime = "x-numaflow-win-end-time" + delimiter = ":" +) + +// Service implements the proto gen server interface and contains the reduceStream operation handler. +type Service struct { + reducepb.UnimplementedReduceServer + creatorHandle ReduceStreamerCreator +} + +// IsReady returns true to indicate the gRPC connection is ready. +func (fs *Service) IsReady(context.Context, *emptypb.Empty) (*reducepb.ReadyResponse, error) { + return &reducepb.ReadyResponse{Ready: true}, nil +} + +// ReduceFn applies a reduce function to a request stream and streams the results. +func (fs *Service) ReduceFn(stream reducepb.Reduce_ReduceFnServer) error { + var ( + err error + ctx = stream.Context() + g errgroup.Group + ) + + taskManager := newReduceTaskManager(fs.creatorHandle) + + // err group for the go routine which reads from the output channel and sends to the stream + g.Go(func() error { + for output := range taskManager.OutputChannel() { + sendErr := stream.Send(output) + if sendErr != nil { + return sendErr + } + } + return nil + }) + + // read messages from the stream and write the messages to corresponding channels + // if the channel is not created, create the channel and invoke the reduceFn + for { + d, recvErr := stream.Recv() + // if EOF, close all the channels + if recvErr == io.EOF { + taskManager.CloseAll() + break + } + if recvErr != nil { + // the error here is returned by stream.Recv() + // it's already a gRPC error + return recvErr + } + + // for Aligned, its just open or append operation + // close signal will be sent to all the reducers when grpc + // input stream gets EOF. + switch d.Operation.Event { + case reducepb.ReduceRequest_WindowOperation_OPEN: + // create a new reduce task and start the reduce operation + err = taskManager.CreateTask(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + case reducepb.ReduceRequest_WindowOperation_APPEND: + // append the datum to the reduce task + err = taskManager.AppendToTask(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + } + } + + taskManager.WaitAll() + // wait for the go routine which reads from the output channel and sends to the stream to return + err = g.Wait() + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + + return nil +} diff --git a/pkg/reducestreamer/service_test.go b/pkg/reducestreamer/service_test.go new file mode 100644 index 00000000..498151cf --- /dev/null +++ b/pkg/reducestreamer/service_test.go @@ -0,0 +1,525 @@ +package reducestreamer + +import ( + "context" + "io" + "reflect" + "sort" + "strconv" + "sync" + "testing" + "time" + + "google.golang.org/grpc" + grpcmd "google.golang.org/grpc/metadata" + "google.golang.org/protobuf/types/known/timestamppb" + + reducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" +) + +type ReduceFnServerTest struct { + ctx context.Context + inputCh chan *reducepb.ReduceRequest + outputCh chan *reducepb.ReduceResponse + grpc.ServerStream +} + +func NewReduceFnServerTest(ctx context.Context, + inputCh chan *reducepb.ReduceRequest, + outputCh chan *reducepb.ReduceResponse) *ReduceFnServerTest { + return &ReduceFnServerTest{ + ctx: ctx, + inputCh: inputCh, + outputCh: outputCh, + } +} + +func (u *ReduceFnServerTest) Send(list *reducepb.ReduceResponse) error { + u.outputCh <- list + return nil +} + +func (u *ReduceFnServerTest) Recv() (*reducepb.ReduceRequest, error) { + val, ok := <-u.inputCh + if !ok { + return val, io.EOF + } + return val, nil +} + +func (u *ReduceFnServerTest) Context() context.Context { + return u.ctx +} + +func TestService_ReduceFn(t *testing.T) { + + tests := []struct { + name string + handler func(ctx context.Context, keys []string, rch <-chan Datum, och chan<- Message, md Metadata) + input []*reducepb.ReduceRequest + expected []*reducepb.ReduceResponse + expectedErr bool + }{ + { + name: "reduce_fn_forward_msg_same_keys", + handler: func(ctx context.Context, keys []string, rch <-chan Datum, och chan<- Message, md Metadata) { + sum := 0 + for val := range rch { + msgVal, _ := strconv.Atoi(string(val.Value())) + sum += msgVal + } + och <- NewMessage([]byte(strconv.Itoa(sum))).WithKeys([]string{keys[0] + "_test"}) + }, + input: []*reducepb.ReduceRequest{ + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + }, + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ + Keys: []string{"client_test"}, + Value: []byte(strconv.Itoa(60)), + }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + }, + expectedErr: false, + }, + { + name: "reduce_fn_forward_msg_multiple_keys", + handler: func(ctx context.Context, keys []string, rch <-chan Datum, och chan<- Message, md Metadata) { + sum := 0 + for val := range rch { + msgVal, _ := strconv.Atoi(string(val.Value())) + sum += msgVal + } + och <- NewMessage([]byte(strconv.Itoa(sum))).WithKeys([]string{keys[0] + "_test"}) + }, + input: []*reducepb.ReduceRequest{ + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client3"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client3"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + }, + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ + Keys: []string{"client1_test"}, + Value: []byte(strconv.Itoa(20)), + }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + { + Result: &reducepb.ReduceResponse_Result{ + Keys: []string{"client2_test"}, + Value: []byte(strconv.Itoa(40)), + }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + { + Result: &reducepb.ReduceResponse_Result{ + Keys: []string{"client3_test"}, + Value: []byte(strconv.Itoa(60)), + }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + }, + expectedErr: false, + }, + { + name: "reduce_fn_forward_msg_forward_to_all", + handler: func(ctx context.Context, keys []string, rch <-chan Datum, och chan<- Message, md Metadata) { + sum := 0 + for val := range rch { + msgVal, _ := strconv.Atoi(string(val.Value())) + sum += msgVal + } + och <- NewMessage([]byte(strconv.Itoa(sum))) + }, + input: []*reducepb.ReduceRequest{ + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + }, + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ + Value: []byte(strconv.Itoa(60)), + }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + }, + }, + { + name: "reduce_fn_forward_msg_drop_msg", + handler: func(ctx context.Context, keys []string, rch <-chan Datum, och chan<- Message, md Metadata) { + sum := 0 + for val := range rch { + msgVal, _ := strconv.Atoi(string(val.Value())) + sum += msgVal + } + och <- MessageToDrop() + }, + input: []*reducepb.ReduceRequest{ + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_APPEND, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + { + Payload: &reducepb.ReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &reducepb.ReduceRequest_WindowOperation{ + Event: reducepb.ReduceRequest_WindowOperation_OPEN, + Windows: []*reducepb.Window{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + }, + }, + }, + }, + expected: []*reducepb.ReduceResponse{ + { + Result: &reducepb.ReduceResponse_Result{ + Tags: []string{DROP}, + Value: []byte{}, + }, + Window: &reducepb.Window{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + }, + EOF: false, + }, + }, + expectedErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fs := &Service{ + creatorHandle: SimpleCreatorWithReduceStreamFn(tt.handler), + } + // here's a trick for testing: + // because we are not using gRPC, we directly set a new incoming ctx + // instead of the regular outgoing context in the real gRPC connection. + ctx := grpcmd.NewIncomingContext(context.Background(), grpcmd.New(map[string]string{winStartTime: "60000", winEndTime: "120000"})) + + inputCh := make(chan *reducepb.ReduceRequest) + outputCh := make(chan *reducepb.ReduceResponse) + result := make([]*reducepb.ReduceResponse, 0) + + udfReduceFnStream := NewReduceFnServerTest(ctx, inputCh, outputCh) + + var wg sync.WaitGroup + var err error + + wg.Add(1) + go func() { + defer wg.Done() + err = fs.ReduceFn(udfReduceFnStream) + close(outputCh) + }() + + wg.Add(1) + go func() { + defer wg.Done() + for msg := range outputCh { + if !msg.EOF { + result = append(result, msg) + } + } + }() + + for _, val := range tt.input { + udfReduceFnStream.inputCh <- val + } + close(udfReduceFnStream.inputCh) + wg.Wait() + + if (err != nil) != tt.expectedErr { + t.Errorf("ReduceFn() error = %v, wantErr %v", err, tt.expectedErr) + return + } + + //sort and compare, since order of the output doesn't matter + sort.Slice(result, func(i, j int) bool { + return string(result[i].Result.Value) < string(result[j].Result.Value) + }) + + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("ReduceFn() got = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/reducestreamer/task_manager.go b/pkg/reducestreamer/task_manager.go new file mode 100644 index 00000000..a6d5feee --- /dev/null +++ b/pkg/reducestreamer/task_manager.go @@ -0,0 +1,177 @@ +package reducestreamer + +import ( + "context" + "fmt" + "strings" + "sync" + + v1 "github.com/numaproj/numaflow-go/pkg/apis/proto/reduce/v1" +) + +// reduceStreamTask represents a task for a performing reduceStream operation. +type reduceStreamTask struct { + keys []string + window *v1.Window + reduceStreamer ReduceStreamer + inputCh chan Datum + outputCh chan Message + doneCh chan struct{} +} + +// buildReduceResponse builds the reduce response from the messages. +func (rt *reduceStreamTask) buildReduceResponse(message Message) *v1.ReduceResponse { + + response := &v1.ReduceResponse{ + Result: &v1.ReduceResponse_Result{ + Keys: message.Keys(), + Value: message.Value(), + Tags: message.Tags(), + }, + Window: rt.window, + } + + return response +} + +func (rt *reduceStreamTask) buildEOFResponse() *v1.ReduceResponse { + response := &v1.ReduceResponse{ + Window: rt.window, + EOF: true, + } + + return response +} + +// uniqueKey returns the unique key for the reduceStream task to be used in the task manager to identify the task. +func (rt *reduceStreamTask) uniqueKey() string { + return fmt.Sprintf("%d:%d:%s", + rt.window.GetStart().AsTime().UnixMilli(), + rt.window.GetEnd().AsTime().UnixMilli(), + strings.Join(rt.keys, delimiter)) +} + +// reduceStreamTaskManager manages the reduceStream tasks. +type reduceStreamTaskManager struct { + creatorHandle ReduceStreamerCreator + tasks map[string]*reduceStreamTask + responseCh chan *v1.ReduceResponse +} + +func newReduceTaskManager(reduceStreamerCreator ReduceStreamerCreator) *reduceStreamTaskManager { + return &reduceStreamTaskManager{ + creatorHandle: reduceStreamerCreator, + tasks: make(map[string]*reduceStreamTask), + responseCh: make(chan *v1.ReduceResponse), + } +} + +// CreateTask creates a new reduceStream task and starts the reduceStream operation. +func (rtm *reduceStreamTaskManager) CreateTask(ctx context.Context, request *v1.ReduceRequest) error { + if len(request.Operation.Windows) != 1 { + return fmt.Errorf("create operation error: invalid number of windows") + } + + md := NewMetadata(NewIntervalWindow(request.Operation.Windows[0].GetStart().AsTime(), + request.Operation.Windows[0].GetEnd().AsTime())) + + task := &reduceStreamTask{ + keys: request.GetPayload().GetKeys(), + window: request.Operation.Windows[0], + inputCh: make(chan Datum), + outputCh: make(chan Message), + doneCh: make(chan struct{}), + } + + key := task.uniqueKey() + rtm.tasks[key] = task + + go func() { + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for message := range task.outputCh { + // write the output to the output channel, service will forward it to downstream + rtm.responseCh <- task.buildReduceResponse(message) + } + // send EOF + rtm.responseCh <- task.buildEOFResponse() + }() + + reduceStreamerHandle := rtm.creatorHandle.Create() + // invoke the reduceStream function + reduceStreamerHandle.ReduceStream(ctx, request.GetPayload().GetKeys(), task.inputCh, task.outputCh, md) + // close the output channel after the reduceStream function is done + close(task.outputCh) + // wait for the output to be forwarded + wg.Wait() + // send a done signal + close(task.doneCh) + }() + + // write the first message to the input channel + task.inputCh <- buildDatum(request) + return nil +} + +// AppendToTask writes the message to the reduceStream task. +// If the task is not found, it creates a new task and starts the reduceStream operation. +func (rtm *reduceStreamTaskManager) AppendToTask(ctx context.Context, request *v1.ReduceRequest) error { + if len(request.Operation.Windows) != 1 { + return fmt.Errorf("append operation error: invalid number of windows") + } + + gKey := generateKey(request.Operation.Windows[0], request.Payload.Keys) + task, ok := rtm.tasks[gKey] + + // if the task is not found, create a new task + if !ok { + return rtm.CreateTask(ctx, request) + } + + task.inputCh <- buildDatum(request) + return nil +} + +// OutputChannel returns the output channel for the reduceStream task manager to read the results. +func (rtm *reduceStreamTaskManager) OutputChannel() <-chan *v1.ReduceResponse { + return rtm.responseCh +} + +// WaitAll waits for all the reduceStream tasks to complete. +func (rtm *reduceStreamTaskManager) WaitAll() { + tasks := make([]*reduceStreamTask, 0, len(rtm.tasks)) + for _, task := range rtm.tasks { + tasks = append(tasks, task) + } + + for _, task := range tasks { + <-task.doneCh + } + // after all the tasks are completed, close the output channel + close(rtm.responseCh) +} + +// CloseAll closes all the reduceStream tasks. +func (rtm *reduceStreamTaskManager) CloseAll() { + tasks := make([]*reduceStreamTask, 0, len(rtm.tasks)) + for _, task := range rtm.tasks { + tasks = append(tasks, task) + } + + for _, task := range tasks { + close(task.inputCh) + } +} + +func generateKey(window *v1.Window, keys []string) string { + return fmt.Sprintf("%d:%d:%s", + window.GetStart().AsTime().UnixMilli(), + window.GetEnd().AsTime().UnixMilli(), + strings.Join(keys, delimiter)) +} + +func buildDatum(request *v1.ReduceRequest) Datum { + return NewHandlerDatum(request.Payload.GetValue(), request.Payload.EventTime.AsTime(), request.Payload.Watermark.AsTime()) +} diff --git a/pkg/reducestreamer/types.go b/pkg/reducestreamer/types.go new file mode 100644 index 00000000..1f19c27a --- /dev/null +++ b/pkg/reducestreamer/types.go @@ -0,0 +1,65 @@ +package reducestreamer + +import "time" + +// handlerDatum implements the Datum interface and is used in the reduceStream functions. +type handlerDatum struct { + value []byte + eventTime time.Time + watermark time.Time +} + +func NewHandlerDatum(value []byte, eventTime time.Time, watermark time.Time) Datum { + return &handlerDatum{ + value: value, + eventTime: eventTime, + watermark: watermark, + } +} + +func (h *handlerDatum) Value() []byte { + return h.value +} + +func (h *handlerDatum) EventTime() time.Time { + return h.eventTime +} + +func (h *handlerDatum) Watermark() time.Time { + return h.watermark +} + +// intervalWindow implements IntervalWindow interface which will be passed as metadata +// to reduce handlers +type intervalWindow struct { + startTime time.Time + endTime time.Time +} + +func NewIntervalWindow(startTime time.Time, endTime time.Time) IntervalWindow { + return &intervalWindow{ + startTime: startTime, + endTime: endTime, + } +} + +func (i *intervalWindow) StartTime() time.Time { + return i.startTime +} + +func (i *intervalWindow) EndTime() time.Time { + return i.endTime +} + +// metadata implements Metadata interface which will be passed to reduceStream function. +type metadata struct { + intervalWindow IntervalWindow +} + +func NewMetadata(window IntervalWindow) Metadata { + return &metadata{intervalWindow: window} +} + +func (m *metadata) IntervalWindow() IntervalWindow { + return m.intervalWindow +} diff --git a/pkg/sessionreducer/doc.go b/pkg/sessionreducer/doc.go new file mode 100644 index 00000000..ddfbc457 --- /dev/null +++ b/pkg/sessionreducer/doc.go @@ -0,0 +1,5 @@ +// Package sessionreducer implements the server code for sessionReduce operation. + +// Examples: https://github.com/numaproj/numaflow-go/tree/main/pkg/sessionreducer/examples/ + +package sessionreducer diff --git a/pkg/sessionreducer/examples/counter/Dockerfile b/pkg/sessionreducer/examples/counter/Dockerfile new file mode 100644 index 00000000..7f1e3e5c --- /dev/null +++ b/pkg/sessionreducer/examples/counter/Dockerfile @@ -0,0 +1,20 @@ +#################################################################################################### +# base +#################################################################################################### +FROM alpine:3.12.3 as base +RUN apk update && apk upgrade && \ + apk add ca-certificates && \ + apk --no-cache add tzdata + +COPY dist/counter-example /bin/counter-example +RUN chmod +x /bin/counter-example + +#################################################################################################### +# counter +#################################################################################################### +FROM scratch as counter +ARG ARCH +COPY --from=base /usr/share/zoneinfo /usr/share/zoneinfo +COPY --from=base /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt +COPY --from=base /bin/counter-example /bin/counter-example +ENTRYPOINT [ "/bin/counter-example" ] diff --git a/pkg/sessionreducer/examples/counter/Makefile b/pkg/sessionreducer/examples/counter/Makefile new file mode 100644 index 00000000..eb4555da --- /dev/null +++ b/pkg/sessionreducer/examples/counter/Makefile @@ -0,0 +1,10 @@ +.PHONY: build +build: + CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -v -o ./dist/counter-example main.go + +.PHONY: image +image: build + docker build -t "quay.io/numaio/numaflow-go/session-counter:v0.6.1" --target counter . + +clean: + -rm -rf ./dist diff --git a/pkg/sessionreducer/examples/counter/README.md b/pkg/sessionreducer/examples/counter/README.md new file mode 100644 index 00000000..5a72f269 --- /dev/null +++ b/pkg/sessionreducer/examples/counter/README.md @@ -0,0 +1,3 @@ +# Counter + +An example User Defined Function that counts the number of events. diff --git a/pkg/sessionreducer/examples/counter/go.mod b/pkg/sessionreducer/examples/counter/go.mod new file mode 100644 index 00000000..85215d54 --- /dev/null +++ b/pkg/sessionreducer/examples/counter/go.mod @@ -0,0 +1,19 @@ +module counter + +go 1.20 + +require ( + github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f + go.uber.org/atomic v1.11.0 +) + +require ( + github.com/golang/protobuf v1.5.3 // indirect + golang.org/x/net v0.9.0 // indirect + golang.org/x/sync v0.1.0 // indirect + golang.org/x/sys v0.7.0 // indirect + golang.org/x/text v0.9.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect + google.golang.org/grpc v1.57.0 // indirect + google.golang.org/protobuf v1.31.0 // indirect +) diff --git a/pkg/sessionreducer/examples/counter/go.sum b/pkg/sessionreducer/examples/counter/go.sum new file mode 100644 index 00000000..00ceacec --- /dev/null +++ b/pkg/sessionreducer/examples/counter/go.sum @@ -0,0 +1,32 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/numaproj/numaflow-go v0.5.3-0.20231211071430-1231c4c278e0 h1:aX6z3AIiJzA0XySqAZhP5ytZDZ3jcsQQnL81HP5mipU= +github.com/numaproj/numaflow-go v0.5.3-0.20231211071430-1231c4c278e0/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f h1:J43ekeRVzE6WGgkWl5oEQ+c4NT1i4VikMkygu4AeUYE= +github.com/numaproj/numaflow-go v0.6.1-0.20231219080635-d096c415a42f/go.mod h1:WoMt31+h3up202zTRI8c/qe42B8UbvwLe2mJH0MAlhI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= +golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= +golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 h1:0nDDozoAU19Qb2HwhXadU8OcsiO/09cnTqhUtq2MEOM= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= +google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= +google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/sessionreducer/examples/counter/main.go b/pkg/sessionreducer/examples/counter/main.go new file mode 100644 index 00000000..8eafdffb --- /dev/null +++ b/pkg/sessionreducer/examples/counter/main.go @@ -0,0 +1,54 @@ +package main + +import ( + "context" + "fmt" + "log" + "strconv" + + "go.uber.org/atomic" + + "github.com/numaproj/numaflow-go/pkg/sessionreducer" +) + +// Counter is a simple session reducer which counts the number of events in a session. +type Counter struct { + count *atomic.Int32 +} + +func (c *Counter) SessionReduce(ctx context.Context, keys []string, input <-chan sessionreducer.Datum, outputCh chan<- sessionreducer.Message) { + for range input { + c.count.Inc() + } + outputCh <- sessionreducer.NewMessage([]byte(fmt.Sprintf("%d", c.count.Load()))).WithKeys(keys) +} + +func (c *Counter) Accumulator(ctx context.Context) []byte { + return []byte(strconv.Itoa(int(c.count.Load()))) +} + +func (c *Counter) MergeAccumulator(ctx context.Context, accumulator []byte) { + val, err := strconv.Atoi(string(accumulator)) + if err != nil { + log.Println("unable to convert the accumulator value to int: ", err.Error()) + return + } + c.count.Add(int32(val)) +} + +func NewSessionCounter() sessionreducer.SessionReducer { + return &Counter{ + count: atomic.NewInt32(0), + } +} + +// SessionCounterCreator is the creator for the session reducer. +type SessionCounterCreator struct{} + +func (s *SessionCounterCreator) Create() sessionreducer.SessionReducer { + return NewSessionCounter() +} + +func main() { + sessionreducer.NewServer(&SessionCounterCreator{}).Start(context.Background()) +} diff --git a/pkg/sessionreducer/interface.go b/pkg/sessionreducer/interface.go new file mode 100644 index 00000000..905ae380 --- /dev/null +++ b/pkg/sessionreducer/interface.go @@ -0,0 +1,31 @@ +package sessionreducer + +import ( + "context" + "time" +) + +// Datum contains methods to get the payload information. +type Datum interface { + Value() []byte + EventTime() time.Time + Watermark() time.Time +} + +// SessionReducer is the interface which can be used to implement a session reduce operation. +type SessionReducer interface { + // SessionReduce applies a session reduce function to a request stream and streams the results. + SessionReduce(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message) + // Accumulator returns the accumulator for the session reducer, will be invoked when this session is merged + // with another session. + Accumulator(ctx context.Context) []byte + // MergeAccumulator merges the accumulator for the session reducer, will be invoked when another session is merged + // with this session. + MergeAccumulator(ctx context.Context, accumulator []byte) +} + +// SessionReducerCreator is the interface which can be used to create a session reducer. +type SessionReducerCreator interface { + // Create creates a session reducer, will be invoked once for every keyed window. + Create() SessionReducer +} diff --git a/pkg/sessionreducer/message.go b/pkg/sessionreducer/message.go new file mode 100644 index 00000000..8fcfe026 --- /dev/null +++ b/pkg/sessionreducer/message.go @@ -0,0 +1,52 @@ +package sessionreducer + +import "fmt" + +var ( + DROP = fmt.Sprintf("%U__DROP__", '\\') // U+005C__DROP__ +) + +// Message is used to wrap the data return by SessionReduce functions +type Message struct { + value []byte + keys []string + tags []string +} + +// NewMessage creates a Message with value +func NewMessage(value []byte) Message { + return Message{value: value} +} + +// MessageToDrop creates a Message to be dropped +func MessageToDrop() Message { + return Message{value: []byte{}, tags: []string{DROP}} +} + +// WithKeys is used to assign the keys to the message +func (m Message) WithKeys(keys []string) Message { + m.keys = keys + return m +} + +// WithTags is used to assign the tags to the message +// tags will be used for conditional forwarding +func (m Message) WithTags(tags []string) Message { + m.tags = tags + return m +} + +// Keys returns message keys +func (m Message) Keys() []string { + return m.keys +} + +// Value returns message value +func (m Message) Value() []byte { + return m.value +} + +// Tags returns message tags +func (m Message) Tags() []string { + return m.tags +} diff --git a/pkg/sessionreducer/options.go b/pkg/sessionreducer/options.go new file mode 100644 index 00000000..ba7af1e3 --- /dev/null +++ b/pkg/sessionreducer/options.go @@ -0,0 +1,43 @@ +package sessionreducer + +import ( + "github.com/numaproj/numaflow-go/pkg/info" +) + +type options struct { + sockAddr string + maxMessageSize int + serverInfoFilePath string +} + +// Option is the interface to apply options. +type Option func(*options) + +func defaultOptions() *options { + return &options{ + sockAddr: address, + maxMessageSize: defaultMaxMessageSize, + serverInfoFilePath: info.ServerInfoFilePath, + } +} + +// WithMaxMessageSize sets the server max receive message size and the server max send message size to the given size. +func WithMaxMessageSize(size int) Option { + return func(opts *options) { + opts.maxMessageSize = size + } +} + +// WithSockAddr start the server with the given sock addr. This is mainly used for testing purposes. +func WithSockAddr(addr string) Option { + return func(opts *options) { + opts.sockAddr = addr + } +} + +// WithServerInfoFilePath sets the server info file path to the given path. +func WithServerInfoFilePath(f string) Option { + return func(opts *options) { + opts.serverInfoFilePath = f + } +} diff --git a/pkg/sessionreducer/options_test.go b/pkg/sessionreducer/options_test.go new file mode 100644 index 00000000..9dfbcfbf --- /dev/null +++ b/pkg/sessionreducer/options_test.go @@ -0,0 +1,18 @@ +package sessionreducer + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWithMaxMessageSize(t *testing.T) { + var ( + size = 1024 * 1024 * 10 + opts = &options{ + maxMessageSize: defaultMaxMessageSize, + } + ) + WithMaxMessageSize(1024 * 1024 * 10)(opts) + assert.Equal(t, size, opts.maxMessageSize) +} diff --git a/pkg/sessionreducer/server.go b/pkg/sessionreducer/server.go new file mode 100644 index 00000000..7bfaf664 --- /dev/null +++ b/pkg/sessionreducer/server.go @@ -0,0 +1,56 @@ +package sessionreducer + +import ( + "context" + "fmt" + "os/signal" + "syscall" + + "github.com/numaproj/numaflow-go/pkg" + sessionreducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1" + "github.com/numaproj/numaflow-go/pkg/shared" +) + +// server is a session reduce gRPC server. +type server struct { + svc *Service + opts *options +} + +// NewServer creates a new session reduce server. +func NewServer(r SessionReducerCreator, inputOptions ...Option) numaflow.Server { + opts := defaultOptions() + for _, inputOption := range inputOptions { + inputOption(opts) + } + s := new(server) + s.svc = new(Service) + s.svc.creatorHandle = r + s.opts = opts + return s +} + +// Start starts the session reduce gRPC server. +func (r *server) Start(ctx context.Context) error { + ctxWithSignal, stop := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM) + defer stop() + + // write server info to the file + // start listening on unix domain socket + lis, err := shared.PrepareServer(r.opts.sockAddr, r.opts.serverInfoFilePath) + if err != nil { + return fmt.Errorf("failed to execute net.Listen(%q, %q): %v", uds, address, err) + } + // close the listener + defer func() { _ = lis.Close() }() + + // create a grpc server + grpcServer := shared.CreateGRPCServer(r.opts.maxMessageSize) + defer grpcServer.GracefulStop() + + // register the sessionReduce service + sessionreducepb.RegisterSessionReduceServer(grpcServer, r.svc) + + // start the grpc server + return shared.StartGRPCServer(ctxWithSignal, grpcServer, lis) +} diff --git a/pkg/sessionreducer/server_test.go b/pkg/sessionreducer/server_test.go new file mode 100644 index 00000000..c020b6e2 --- /dev/null +++ b/pkg/sessionreducer/server_test.go @@ -0,0 +1,28 @@ +package sessionreducer + +import ( + "context" + "os" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestReduceServer_Start(t *testing.T) { + socketFile, _ := os.CreateTemp("/tmp", "numaflow-test.sock") + defer func() { + _ = os.RemoveAll(socketFile.Name()) + }() + + serverInfoFile, _ := os.CreateTemp("/tmp", "numaflow-test-info") + defer func() { + _ = os.RemoveAll(serverInfoFile.Name()) + }() + + // note: using actual uds connection + ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) + defer cancel() + err := NewServer(&SessionSumCreator{}, WithSockAddr(socketFile.Name()), WithServerInfoFilePath(serverInfoFile.Name())).Start(ctx) + assert.NoError(t, err) +} diff --git a/pkg/sessionreducer/service.go b/pkg/sessionreducer/service.go new file mode 100644 index 00000000..b3ddd08b --- /dev/null +++ b/pkg/sessionreducer/service.go @@ -0,0 +1,113 @@ +package sessionreducer + +import ( + "context" + "io" + + "golang.org/x/sync/errgroup" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/types/known/emptypb" + + sessionreducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1" +) + +const ( + uds = "unix" + defaultMaxMessageSize = 1024 * 1024 * 64 + address = "/var/run/numaflow/sessionreduce.sock" + delimiter = ":" +) + +// Service implements the proto gen server interface and contains the sesionreduce operation handler. +type Service struct { + sessionreducepb.UnimplementedSessionReduceServer + creatorHandle SessionReducerCreator +} + +// IsReady returns true to indicate the gRPC connection is ready. +func (fs *Service) IsReady(context.Context, *emptypb.Empty) (*sessionreducepb.ReadyResponse, error) { + return &sessionreducepb.ReadyResponse{Ready: true}, nil +} + +// SessionReduceFn applies a session reduce function to a request stream and streams the results. +func (fs *Service) SessionReduceFn(stream sessionreducepb.SessionReduce_SessionReduceFnServer) error { + + ctx := stream.Context() + taskManager := newReduceTaskManager(fs.creatorHandle) + // err group for the go routine which reads from the output channel and sends to the stream + var g errgroup.Group + + g.Go(func() error { + for output := range taskManager.OutputChannel() { + err := stream.Send(output) + if err != nil { + return err + } + } + return nil + }) + + for { + d, recvErr := stream.Recv() + + // if the stream is closed, break and wait for the tasks to return + if recvErr == io.EOF { + break + } + + if recvErr != nil { + statusErr := status.Errorf(codes.Internal, recvErr.Error()) + return statusErr + } + + // invoke the appropriate task manager method based on the operation + switch d.Operation.Event { + case sessionreducepb.SessionReduceRequest_WindowOperation_OPEN: + // create a new task and start the session reduce operation + // also append the datum to the task + err := taskManager.CreateTask(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + case sessionreducepb.SessionReduceRequest_WindowOperation_CLOSE: + // close the task + taskManager.CloseTask(d) + case sessionreducepb.SessionReduceRequest_WindowOperation_APPEND: + // append the datum to the task + err := taskManager.AppendToTask(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + case sessionreducepb.SessionReduceRequest_WindowOperation_MERGE: + // merge the tasks + err := taskManager.MergeTasks(ctx, d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + case sessionreducepb.SessionReduceRequest_WindowOperation_EXPAND: + // expand the task + err := taskManager.ExpandTask(d) + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + } + + } + + // wait for all the tasks to return + taskManager.WaitAll() + + // wait for the go routine which reads from the output channel and sends to the stream to return + err := g.Wait() + if err != nil { + statusErr := status.Errorf(codes.Internal, err.Error()) + return statusErr + } + + return nil +} diff --git a/pkg/sessionreducer/service_test.go b/pkg/sessionreducer/service_test.go new file mode 100644 index 00000000..bcce002d --- /dev/null +++ b/pkg/sessionreducer/service_test.go @@ -0,0 +1,830 @@ +package sessionreducer + +import ( + "context" + "io" + "log" + "reflect" + "sort" + "strconv" + "sync" + "testing" + "time" + + "go.uber.org/atomic" + "google.golang.org/grpc" + "google.golang.org/protobuf/types/known/timestamppb" + + sessionreducepb "github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1" +) + +type SessionReduceFnServerTest struct { + ctx context.Context + inputCh chan *sessionreducepb.SessionReduceRequest + outputCh chan *sessionreducepb.SessionReduceResponse + grpc.ServerStream +} + +func NewReduceFnServerTest(ctx context.Context, + inputCh chan *sessionreducepb.SessionReduceRequest, + outputCh chan *sessionreducepb.SessionReduceResponse) *SessionReduceFnServerTest { + return &SessionReduceFnServerTest{ + ctx: ctx, + inputCh: inputCh, + outputCh: outputCh, + } +} + +func (u *SessionReduceFnServerTest) Send(list *sessionreducepb.SessionReduceResponse) error { + u.outputCh <- list + return nil +} + +func (u *SessionReduceFnServerTest) Recv() (*sessionreducepb.SessionReduceRequest, error) { + val, ok := <-u.inputCh + if !ok { + return val, io.EOF + } + return val, nil +} + +func (u *SessionReduceFnServerTest) Context() context.Context { + return u.ctx +} + +type SessionSum struct { + sum *atomic.Int32 +} + +func (s *SessionSum) SessionReduce(ctx context.Context, keys []string, inputCh <-chan Datum, outputCh chan<- Message) { + for val := range inputCh { + msgVal, _ := strconv.Atoi(string(val.Value())) + s.sum.Add(int32(msgVal)) + } + outputCh <- NewMessage([]byte(strconv.Itoa(int(s.sum.Load())))).WithKeys([]string{keys[0] + "_test"}) +} + +func (s *SessionSum) Accumulator(ctx context.Context) []byte { + return []byte(strconv.Itoa(int(s.sum.Load()))) +} + +func (s *SessionSum) MergeAccumulator(ctx context.Context, accumulator []byte) { + val, err := strconv.Atoi(string(accumulator)) + if err != nil { + log.Println("unable to convert the accumulator value to int: ", err.Error()) + return + } + s.sum.Add(int32(val)) +} + +type SessionSumCreator struct { +} + +func (s *SessionSumCreator) Create() SessionReducer { + return NewSessionSum() +} + +func NewSessionSum() SessionReducer { + return &SessionSum{ + sum: atomic.NewInt32(0), + } +} + +func TestService_SessionReduceFn(t *testing.T) { + + tests := []struct { + name string + handler SessionReducerCreator + input []*sessionreducepb.SessionReduceRequest + expected []*sessionreducepb.SessionReduceResponse + expectedErr bool + }{ + { + name: "open_append_close", + handler: &SessionSumCreator{}, + input: []*sessionreducepb.SessionReduceRequest{ + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + Keys: []string{"client"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_APPEND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + Keys: []string{"client"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client"}, + Value: []byte(strconv.Itoa(30)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_APPEND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + Keys: []string{"client"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_APPEND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + Keys: []string{"client"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_CLOSE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + Keys: []string{"client"}, + }, + }, + }, + }, + }, + expected: []*sessionreducepb.SessionReduceResponse{ + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client_test"}, + Value: []byte(strconv.Itoa(60)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(120000)), + Slot: "slot-0", + Keys: []string{"client"}, + }, + EOF: false, + }, + }, + expectedErr: false, + }, + { + name: "open_expand_close", + handler: &SessionSumCreator{}, + input: []*sessionreducepb.SessionReduceRequest{ + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_EXPAND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(75000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_EXPAND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(79000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_CLOSE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(75000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(79000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + }, + expected: []*sessionreducepb.SessionReduceResponse{ + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client1_test"}, + Value: []byte(strconv.Itoa(20)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(75000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + EOF: false, + }, + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client2_test"}, + Value: []byte(strconv.Itoa(40)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(79000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + EOF: false, + }, + }, + expectedErr: false, + }, + { + name: "open_merge_close", + handler: &SessionSumCreator{}, + input: []*sessionreducepb.SessionReduceRequest{ + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(85000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(88000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_MERGE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(85000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_MERGE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(88000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_CLOSE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(85000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(88000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + }, + expected: []*sessionreducepb.SessionReduceResponse{ + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client1_test"}, + Value: []byte(strconv.Itoa(20)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(85000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + EOF: false, + }, + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client2_test"}, + Value: []byte(strconv.Itoa(40)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(88000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + EOF: false, + }, + }, + expectedErr: false, + }, + { + name: "open_expand_append_merge_close", + handler: &SessionSumCreator{}, + input: []*sessionreducepb.SessionReduceRequest{ + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(85000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(88000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_EXPAND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(85000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(95000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_EXPAND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(88000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(98000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client1"}, + Value: []byte(strconv.Itoa(10)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_APPEND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(95000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Payload: &sessionreducepb.SessionReduceRequest_Payload{ + Keys: []string{"client2"}, + Value: []byte(strconv.Itoa(20)), + EventTime: timestamppb.New(time.Time{}), + Watermark: timestamppb.New(time.Time{}), + }, + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_APPEND, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(98000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_MERGE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(75000)), + End: timestamppb.New(time.UnixMilli(95000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_MERGE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(70000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + { + Start: timestamppb.New(time.UnixMilli(78000)), + End: timestamppb.New(time.UnixMilli(98000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + { + Operation: &sessionreducepb.SessionReduceRequest_WindowOperation{ + Event: sessionreducepb.SessionReduceRequest_WindowOperation_CLOSE, + KeyedWindows: []*sessionreducepb.KeyedWindow{ + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(95000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + { + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(98000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + }, + }, + }, + }, + expected: []*sessionreducepb.SessionReduceResponse{ + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client1_test"}, + Value: []byte(strconv.Itoa(40)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(95000)), + Slot: "slot-0", + Keys: []string{"client1"}, + }, + EOF: false, + }, + { + Result: &sessionreducepb.SessionReduceResponse_Result{ + Keys: []string{"client2_test"}, + Value: []byte(strconv.Itoa(80)), + }, + KeyedWindow: &sessionreducepb.KeyedWindow{ + Start: timestamppb.New(time.UnixMilli(60000)), + End: timestamppb.New(time.UnixMilli(98000)), + Slot: "slot-0", + Keys: []string{"client2"}, + }, + EOF: false, + }, + }, + expectedErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fs := &Service{ + creatorHandle: tt.handler, + } + // here's a trick for testing: + // because we are not using gRPC, we directly set a new incoming ctx + // instead of the regular outgoing context in the real gRPC connection. + + inputCh := make(chan *sessionreducepb.SessionReduceRequest) + outputCh := make(chan *sessionreducepb.SessionReduceResponse) + result := make([]*sessionreducepb.SessionReduceResponse, 0) + + udfReduceFnStream := NewReduceFnServerTest(context.Background(), inputCh, outputCh) + + var wg sync.WaitGroup + var err error + + wg.Add(1) + go func() { + defer wg.Done() + err = fs.SessionReduceFn(udfReduceFnStream) + close(outputCh) + }() + + wg.Add(1) + go func() { + defer wg.Done() + for msg := range outputCh { + if !msg.EOF { + result = append(result, msg) + } + } + }() + + for _, val := range tt.input { + udfReduceFnStream.inputCh <- val + } + close(udfReduceFnStream.inputCh) + wg.Wait() + + if (err != nil) != tt.expectedErr { + t.Errorf("ReduceFn() error = %v, wantErr %v", err, tt.expectedErr) + return + } + + //sort and compare, since order of the output doesn't matter + sort.Slice(result, func(i, j int) bool { + return string(result[i].Result.Value) < string(result[j].Result.Value) + }) + + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("SessionReduceFn() got = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/sessionreducer/task_manager.go b/pkg/sessionreducer/task_manager.go new file mode 100644 index 00000000..e40df8b6 --- /dev/null +++ b/pkg/sessionreducer/task_manager.go @@ -0,0 +1,306 @@ +package sessionreducer + +import ( + "context" + "fmt" + "strings" + "sync" + + "go.uber.org/atomic" + + v1 "github.com/numaproj/numaflow-go/pkg/apis/proto/sessionreduce/v1" +) + +// sessionReduceTask represents a task for a performing session reduce operation. +type sessionReduceTask struct { + keyedWindow *v1.KeyedWindow + sessionReducer SessionReducer + inputCh chan Datum + outputCh chan Message + doneCh chan struct{} + merged *atomic.Bool +} + +// buildSessionReduceResponse builds the session reduce response from the messages. +func (rt *sessionReduceTask) buildSessionReduceResponse(message Message) *v1.SessionReduceResponse { + + response := &v1.SessionReduceResponse{ + Result: &v1.SessionReduceResponse_Result{ + Keys: message.Keys(), + Value: message.Value(), + Tags: message.Tags(), + }, + KeyedWindow: rt.keyedWindow, + } + + return response +} + +// buildEOFResponse builds the EOF response for the session reduce task. +func (rt *sessionReduceTask) buildEOFResponse() *v1.SessionReduceResponse { + response := &v1.SessionReduceResponse{ + KeyedWindow: rt.keyedWindow, + EOF: true, + } + + return response +} + +// uniqueKey returns the unique key for the session reduce task to be used in the task manager to identify the task. +func (rt *sessionReduceTask) uniqueKey() string { + return fmt.Sprintf("%d:%d:%s", + rt.keyedWindow.GetStart().AsTime().UnixMilli(), + rt.keyedWindow.GetEnd().AsTime().UnixMilli(), + strings.Join(rt.keyedWindow.GetKeys(), delimiter)) +} + +// sessionReduceTaskManager manages the tasks for a session reduce operation. +type sessionReduceTaskManager struct { + creatorHandle SessionReducerCreator + tasks map[string]*sessionReduceTask + responseCh chan *v1.SessionReduceResponse + rw sync.RWMutex +} + +func newReduceTaskManager(sessionReducerFactory SessionReducerCreator) *sessionReduceTaskManager { + return &sessionReduceTaskManager{ + creatorHandle: sessionReducerFactory, + tasks: make(map[string]*sessionReduceTask), + responseCh: make(chan *v1.SessionReduceResponse), + } +} + +// CreateTask creates a new task and starts the session reduce operation. +func (rtm *sessionReduceTaskManager) CreateTask(ctx context.Context, request *v1.SessionReduceRequest) error { + rtm.rw.Lock() + + // for create operation, there should be exactly one keyedWindow + if len(request.Operation.KeyedWindows) != 1 { + return fmt.Errorf("create operation error: invalid number of windows in the request - %d", len(request.Operation.KeyedWindows)) + } + + task := &sessionReduceTask{ + keyedWindow: request.Operation.KeyedWindows[0], + sessionReducer: rtm.creatorHandle.Create(), + inputCh: make(chan Datum), + outputCh: make(chan Message), + doneCh: make(chan struct{}), + merged: atomic.NewBool(false), + } + + // add the task to the tasks list + rtm.tasks[task.uniqueKey()] = task + + rtm.rw.Unlock() + + go func() { + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for message := range task.outputCh { + if !task.merged.Load() { + // write the output to the output channel, service will forward it to downstream + // if the task is merged to another task, we don't need to send the response + rtm.responseCh <- task.buildSessionReduceResponse(message) + } + } + if !task.merged.Load() { + // send EOF + rtm.responseCh <- task.buildEOFResponse() + } + }() + + task.sessionReducer.SessionReduce(ctx, task.keyedWindow.GetKeys(), task.inputCh, task.outputCh) + // close the output channel and wait for the response to be forwarded + close(task.outputCh) + wg.Wait() + // send a done signal + close(task.doneCh) + // delete the task from the tasks list + rtm.rw.Lock() + delete(rtm.tasks, task.uniqueKey()) + rtm.rw.Unlock() + }() + + // send the datum to the task if the payload is not nil + if request.Payload != nil { + task.inputCh <- buildDatum(request.Payload) + } + + return nil +} + +// AppendToTask writes the message to the reduce task. +// If the task is not found, it will create a new task and starts the session reduce operation. +func (rtm *sessionReduceTaskManager) AppendToTask(ctx context.Context, request *v1.SessionReduceRequest) error { + + // for append operation, there should be exactly one keyedWindow + if len(request.Operation.KeyedWindows) != 1 { + return fmt.Errorf("append operation error: invalid number of windows in the request - %d", len(request.Operation.KeyedWindows)) + } + + rtm.rw.RLock() + task, ok := rtm.tasks[generateKey(request.Operation.KeyedWindows[0])] + rtm.rw.RUnlock() + + // if the task is not found, create a new task and start the session reduce operation + if !ok { + return rtm.CreateTask(ctx, request) + } + + // send the datum to the task if the payload is not nil + if request.Payload != nil { + task.inputCh <- buildDatum(request.Payload) + } + return nil +} + +// CloseTask closes the input channel of the session reduce tasks. +func (rtm *sessionReduceTaskManager) CloseTask(request *v1.SessionReduceRequest) { + rtm.rw.RLock() + tasksToBeClosed := make([]*sessionReduceTask, 0, len(request.Operation.KeyedWindows)) + for _, window := range request.Operation.KeyedWindows { + key := generateKey(window) + task, ok := rtm.tasks[key] + if ok { + tasksToBeClosed = append(tasksToBeClosed, task) + } + } + rtm.rw.RUnlock() + + for _, task := range tasksToBeClosed { + close(task.inputCh) + } +} + +// MergeTasks merges the session reduce tasks. It will create a new task with the merged window and +// merges the accumulators from the other tasks to the merged task. +func (rtm *sessionReduceTaskManager) MergeTasks(ctx context.Context, request *v1.SessionReduceRequest) error { + rtm.rw.Lock() + mergedWindow := request.Operation.KeyedWindows[0] + + tasks := make([]*sessionReduceTask, 0, len(request.Operation.KeyedWindows)) + + // merge the aggregators from the other tasks + for _, window := range request.Operation.KeyedWindows { + key := generateKey(window) + task, ok := rtm.tasks[key] + if !ok { + rtm.rw.Unlock() + return fmt.Errorf("merge operation error: task not found for %s", key) + } + task.merged.Store(true) + tasks = append(tasks, task) + + // mergedWindow will be the largest window which contains all the windows + if window.GetStart().AsTime().Before(mergedWindow.GetStart().AsTime()) { + mergedWindow.Start = window.Start + } + + if window.GetEnd().AsTime().After(mergedWindow.GetEnd().AsTime()) { + mergedWindow.End = window.End + } + } + + rtm.rw.Unlock() + + accumulators := make([][]byte, 0, len(tasks)) + // close all the tasks and collect the accumulators + for _, task := range tasks { + close(task.inputCh) + // wait for the task to complete + <-task.doneCh + accumulators = append(accumulators, task.sessionReducer.Accumulator(ctx)) + } + + // create a new task with the merged keyedWindow + err := rtm.CreateTask(ctx, &v1.SessionReduceRequest{ + Payload: nil, + Operation: &v1.SessionReduceRequest_WindowOperation{ + Event: v1.SessionReduceRequest_WindowOperation_OPEN, + KeyedWindows: []*v1.KeyedWindow{mergedWindow}, + }, + }) + if err != nil { + return err + } + + rtm.rw.RLock() + mergedTask, ok := rtm.tasks[generateKey(mergedWindow)] + rtm.rw.RUnlock() + if !ok { + return fmt.Errorf("merge operation error: merged task not found for key %s", mergedWindow.String()) + } + // merge the accumulators using the merged task + for _, aggregator := range accumulators { + mergedTask.sessionReducer.MergeAccumulator(ctx, aggregator) + } + + return nil +} + +// ExpandTask expands session reduce task. It will update the keyedWindow of the task +// expects request.Operation.KeyedWindows to have exactly two windows. The first is the old window and the second +// is the new expanded window. +func (rtm *sessionReduceTaskManager) ExpandTask(request *v1.SessionReduceRequest) error { + // for expand operation, there should be exactly two windows + if len(request.Operation.KeyedWindows) != 2 { + return fmt.Errorf("expand operation error: expected exactly two windows") + } + + rtm.rw.Lock() + key := generateKey(request.Operation.KeyedWindows[0]) + task, ok := rtm.tasks[key] + if !ok { + rtm.rw.Unlock() + return fmt.Errorf("expand operation error: task not found for key - %s", key) + } + + // assign the new keyedWindow to the task + task.keyedWindow = request.Operation.KeyedWindows[1] + + // delete the old entry from the tasks map and add the new entry + delete(rtm.tasks, key) + rtm.tasks[task.uniqueKey()] = task + rtm.rw.Unlock() + + // send the datum to the task if the payload is not nil + if request.Payload != nil { + task.inputCh <- buildDatum(request.GetPayload()) + } + + return nil +} + +// OutputChannel returns the output channel of the task manager to read the results. +func (rtm *sessionReduceTaskManager) OutputChannel() <-chan *v1.SessionReduceResponse { + return rtm.responseCh +} + +// WaitAll waits for all the pending reduce tasks to complete. +func (rtm *sessionReduceTaskManager) WaitAll() { + rtm.rw.RLock() + tasks := make([]*sessionReduceTask, 0, len(rtm.tasks)) + for _, task := range rtm.tasks { + tasks = append(tasks, task) + } + rtm.rw.RUnlock() + + for _, task := range tasks { + <-task.doneCh + } + // after all the tasks are completed, close the output channel + close(rtm.responseCh) +} + +func generateKey(keyedWindow *v1.KeyedWindow) string { + return fmt.Sprintf("%d:%d:%s", + keyedWindow.GetStart().AsTime().UnixMilli(), + keyedWindow.GetEnd().AsTime().UnixMilli(), + strings.Join(keyedWindow.GetKeys(), delimiter)) +} + +func buildDatum(payload *v1.SessionReduceRequest_Payload) Datum { + return NewHandlerDatum(payload.GetValue(), payload.EventTime.AsTime(), payload.Watermark.AsTime()) +} diff --git a/pkg/sessionreducer/types.go b/pkg/sessionreducer/types.go new file mode 100644 index 00000000..ee5c4966 --- /dev/null +++ b/pkg/sessionreducer/types.go @@ -0,0 +1,30 @@ +package sessionreducer + +import "time" + +// handlerDatum implements the Datum interface and is used in the SessionReduce functions. +type handlerDatum struct { + value []byte + eventTime time.Time + watermark time.Time +} + +func NewHandlerDatum(value []byte, eventTime time.Time, watermark time.Time) Datum { + return &handlerDatum{ + value: value, + eventTime: eventTime, + watermark: watermark, + } +} + +func (h *handlerDatum) Value() []byte { + return h.value +} + +func (h *handlerDatum) EventTime() time.Time { + return h.eventTime +} + +func (h *handlerDatum) Watermark() time.Time { + return h.watermark +} diff --git a/pkg/sideinput/examples/simple-sideinput/Makefile b/pkg/sideinput/examples/simple-sideinput/Makefile index 04299670..97db7739 100644 --- a/pkg/sideinput/examples/simple-sideinput/Makefile +++ b/pkg/sideinput/examples/simple-sideinput/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker buildx build -t "quay.io/numaio/numaflow-go/sideinput-example:v0.5.0" --platform linux/amd64,linux/arm64 --target sideinput . --push + docker buildx build -t "quay.io/numaio/numaflow-go/sideinput-example:v0.6.0" --platform linux/amd64,linux/arm64 --target sideinput . --push clean: -rm -rf ./dist diff --git a/pkg/sideinput/examples/simple-sideinput/go.mod b/pkg/sideinput/examples/simple-sideinput/go.mod index 251d7b61..64056201 100644 --- a/pkg/sideinput/examples/simple-sideinput/go.mod +++ b/pkg/sideinput/examples/simple-sideinput/go.mod @@ -1,11 +1,8 @@ -module even_odd +module sideinput go 1.20 -require ( - github.com/fsnotify/fsnotify v1.6.0 - github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe -) +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/sideinput/examples/simple-sideinput/go.sum b/pkg/sideinput/examples/simple-sideinput/go.sum index 85fdf09f..8b1ed758 100644 --- a/pkg/sideinput/examples/simple-sideinput/go.sum +++ b/pkg/sideinput/examples/simple-sideinput/go.sum @@ -1,18 +1,15 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= -github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= diff --git a/pkg/sideinput/examples/simple-sideinput/udf/Makefile b/pkg/sideinput/examples/simple-sideinput/udf/Makefile index b62d4ccf..688d5f51 100644 --- a/pkg/sideinput/examples/simple-sideinput/udf/Makefile +++ b/pkg/sideinput/examples/simple-sideinput/udf/Makefile @@ -5,7 +5,7 @@ build: .PHONY: image #To build an image that supports multiple platforms(linux/amd64,linux/arm64) and push to quay.io, use the following command image: build - docker buildx build -t "quay.io/numaio/numaflow-go/udf-sideinput-example:v0.5.0" --platform linux/amd64,linux/arm64 --target udf-sideinput . --push + docker buildx build -t "quay.io/numaio/numaflow-go/udf-sideinput-example:v0.6.0" --platform linux/amd64,linux/arm64 --target udf-sideinput . --push diff --git a/pkg/sideinput/examples/simple-sideinput/udf/go.mod b/pkg/sideinput/examples/simple-sideinput/udf/go.mod index 251d7b61..261a985d 100644 --- a/pkg/sideinput/examples/simple-sideinput/udf/go.mod +++ b/pkg/sideinput/examples/simple-sideinput/udf/go.mod @@ -4,7 +4,7 @@ go 1.20 require ( github.com/fsnotify/fsnotify v1.6.0 - github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe + github.com/numaproj/numaflow-go v0.6.0 ) require ( diff --git a/pkg/sideinput/examples/simple-sideinput/udf/go.sum b/pkg/sideinput/examples/simple-sideinput/udf/go.sum index 85fdf09f..66525120 100644 --- a/pkg/sideinput/examples/simple-sideinput/udf/go.sum +++ b/pkg/sideinput/examples/simple-sideinput/udf/go.sum @@ -6,8 +6,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/sideinput/options.go b/pkg/sideinput/options.go index 6703a90f..761ef737 100644 --- a/pkg/sideinput/options.go +++ b/pkg/sideinput/options.go @@ -9,8 +9,8 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -// DefaultOptions returns the default options. -func DefaultOptions() *options { +// defaultOptions returns the default options. +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/sideinput/server.go b/pkg/sideinput/server.go index a19a59c9..37cf7d02 100644 --- a/pkg/sideinput/server.go +++ b/pkg/sideinput/server.go @@ -20,7 +20,7 @@ type server struct { // NewSideInputServer creates a new server object. func NewSideInputServer(r SideInputRetriever, inputOptions ...Option) numaflow.Server { - opts := DefaultOptions() + opts := defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) } diff --git a/pkg/sinker/examples/log/Makefile b/pkg/sinker/examples/log/Makefile index 40af4fff..cada26b3 100644 --- a/pkg/sinker/examples/log/Makefile +++ b/pkg/sinker/examples/log/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/sink-log:v0.5.0" --target log . + docker build -t "quay.io/numaio/numaflow-go/sink-log:v0.6.0" --target log . clean: -rm -rf ./dist diff --git a/pkg/sinker/examples/log/go.mod b/pkg/sinker/examples/log/go.mod index 54f770a7..f48ef452 100644 --- a/pkg/sinker/examples/log/go.mod +++ b/pkg/sinker/examples/log/go.mod @@ -1,8 +1,8 @@ -module even_odd +module log_sink go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/sinker/examples/log/go.sum b/pkg/sinker/examples/log/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/sinker/examples/log/go.sum +++ b/pkg/sinker/examples/log/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/sinker/options.go b/pkg/sinker/options.go index 2001e930..b9974034 100644 --- a/pkg/sinker/options.go +++ b/pkg/sinker/options.go @@ -13,7 +13,7 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -func DefaultOptions() *options { +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/sinker/server.go b/pkg/sinker/server.go index adcc7ce3..4833961a 100644 --- a/pkg/sinker/server.go +++ b/pkg/sinker/server.go @@ -20,7 +20,7 @@ type sinkServer struct { // NewServer creates a new sinkServer object. func NewServer(h Sinker, inputOptions ...Option) numaflow.Server { - opts := DefaultOptions() + opts := defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) } diff --git a/pkg/sourcer/examples/simple_source/Makefile b/pkg/sourcer/examples/simple_source/Makefile index cbe79fd4..6712c826 100644 --- a/pkg/sourcer/examples/simple_source/Makefile +++ b/pkg/sourcer/examples/simple_source/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/source-simple-source:v0.5.1" --target simple-source . + docker build -t "quay.io/numaio/numaflow-go/source-simple-source:v0.6.0" --target simple-source . clean: -rm -rf ./dist diff --git a/pkg/sourcer/examples/simple_source/go.mod b/pkg/sourcer/examples/simple_source/go.mod index a0550a49..9d983f65 100644 --- a/pkg/sourcer/examples/simple_source/go.mod +++ b/pkg/sourcer/examples/simple_source/go.mod @@ -1,9 +1,9 @@ -module github.com/numaproj/numaflow-go/pkg/sourcer/examples/simple_source +module simple_source go 1.20 require ( - github.com/numaproj/numaflow-go v0.5.1 + github.com/numaproj/numaflow-go v0.6.0 github.com/stretchr/testify v1.8.1 ) @@ -12,7 +12,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/kr/text v0.2.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/rogpeppe/go-internal v1.11.0 // indirect + github.com/rogpeppe/go-internal v1.12.0 // indirect golang.org/x/net v0.9.0 // indirect golang.org/x/sys v0.7.0 // indirect golang.org/x/text v0.9.0 // indirect diff --git a/pkg/sourcer/examples/simple_source/go.sum b/pkg/sourcer/examples/simple_source/go.sum index 79cbb35d..191614d2 100644 --- a/pkg/sourcer/examples/simple_source/go.sum +++ b/pkg/sourcer/examples/simple_source/go.sum @@ -10,12 +10,12 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/numaproj/numaflow-go v0.5.1 h1:mvala+EmlrRtI20cr1y928zR7dO/HKUJsLai7vISHEA= -github.com/numaproj/numaflow-go v0.5.1/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/pkg/sourcer/examples/simple_source/impl/simple_source.go b/pkg/sourcer/examples/simple_source/impl/simple_source.go index a7ce0c75..bd8c783c 100644 --- a/pkg/sourcer/examples/simple_source/impl/simple_source.go +++ b/pkg/sourcer/examples/simple_source/impl/simple_source.go @@ -56,7 +56,7 @@ func (s *SimpleSource) Read(_ context.Context, readRequest sourcesdk.ReadRequest offsetValue := serializeOffset(s.readIdx) messageCh <- sourcesdk.NewMessage( []byte(strconv.FormatInt(s.readIdx, 10)), - sourcesdk.NewOffset(offsetValue, "0"), + sourcesdk.NewOffsetWithDefaultPartitionId(offsetValue), time.Now()) // Mark the offset as to be acked, and increment the read index. s.toAckSet[s.readIdx] = struct{}{} @@ -72,6 +72,10 @@ func (s *SimpleSource) Ack(_ context.Context, request sourcesdk.AckRequest) { } } +func (s *SimpleSource) Partitions(_ context.Context) []int32 { + return sourcesdk.DefaultPartitions() +} + func serializeOffset(idx int64) []byte { return []byte(strconv.FormatInt(idx, 10)) } diff --git a/pkg/sourcer/examples/simple_source/main.go b/pkg/sourcer/examples/simple_source/main.go index 77bcf20e..f00b0f2a 100644 --- a/pkg/sourcer/examples/simple_source/main.go +++ b/pkg/sourcer/examples/simple_source/main.go @@ -4,9 +4,9 @@ import ( "context" "log" - "github.com/numaproj/numaflow-go/pkg/sourcer" + "simple_source/impl" - "github.com/numaproj/numaflow-go/pkg/sourcer/examples/simple_source/impl" + "github.com/numaproj/numaflow-go/pkg/sourcer" ) func main() { diff --git a/pkg/sourcer/interface.go b/pkg/sourcer/interface.go index 9e7a8540..01f78515 100644 --- a/pkg/sourcer/interface.go +++ b/pkg/sourcer/interface.go @@ -18,6 +18,12 @@ type Sourcer interface { // When the return value is negative, it indicates the pending information is not available. // With pending information being not available, the Numaflow platform doesn't auto-scale the source. Pending(ctx context.Context) int64 + // Partitions returns the partitions associated with the source, will be used by the platform to determine + // the partitions to which the watermark should be published. If the source doesn't have partitions, + // DefaultPartitions() can be used to return the default partitions. + // In most cases, the DefaultPartitions() should be enough; the cases where we need to implement custom Partitions() + // is in a case like Kafka, where a reader can read from multiple Kafka partitions. + Partitions(ctx context.Context) []int32 } // ReadRequest is the interface of read request. diff --git a/pkg/sourcer/message.go b/pkg/sourcer/message.go index 82bf955e..7bdb26a7 100644 --- a/pkg/sourcer/message.go +++ b/pkg/sourcer/message.go @@ -1,6 +1,13 @@ package sourcer -import "time" +import ( + "os" + "strconv" + "time" +) + +// create default partition id from the environment variable "NUMAFLOW_REPLICA" +var defaultPartitionId, _ = strconv.Atoi(os.Getenv("NUMAFLOW_REPLICA")) // Message is used to wrap the data return by UDSource type Message struct { @@ -43,20 +50,36 @@ func (m Message) EventTime() time.Time { type Offset struct { value []byte - partitionId string + partitionId int32 } // NewOffset creates an Offset with value and partition id -func NewOffset(value []byte, partitionId string) Offset { +func NewOffset(value []byte, partitionId int32) Offset { return Offset{value: value, partitionId: partitionId} } +// NewOffsetWithDefaultPartitionId creates an Offset with value and default partition id. This +// function can be used if you use DefaultPartitions() to implement the Sourcer interface. +// For most cases, this function can be used as long as the source does not have a concept of partitions. +// If you need to implement a custom partition, use `NewOffset`. +func NewOffsetWithDefaultPartitionId(value []byte) Offset { + return Offset{value: value, partitionId: DefaultPartitions()[0]} +} + +// DefaultPartitions returns default partitions for the source. +// It can be used in the Partitions() function of the Sourcer interface only +// if the source doesn't have partitions. DefaultPartition will be the pod replica +// index of the source. +func DefaultPartitions() []int32 { + return []int32{int32(defaultPartitionId)} +} + // Value returns value of the offset func (o Offset) Value() []byte { return o.value } // PartitionId returns partition id of the offset -func (o Offset) PartitionId() string { +func (o Offset) PartitionId() int32 { return o.partitionId } diff --git a/pkg/sourcer/options.go b/pkg/sourcer/options.go index 43c66af7..a665ee25 100644 --- a/pkg/sourcer/options.go +++ b/pkg/sourcer/options.go @@ -13,7 +13,7 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -func DefaultOptions() *options { +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/sourcer/server.go b/pkg/sourcer/server.go index b52f5310..58bd01b8 100644 --- a/pkg/sourcer/server.go +++ b/pkg/sourcer/server.go @@ -21,7 +21,7 @@ type server struct { func NewServer( source Sourcer, inputOptions ...Option) numaflow.Server { - var opts = DefaultOptions() + var opts = defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) diff --git a/pkg/sourcer/server_test.go b/pkg/sourcer/server_test.go index 42067ea6..6ace5d5f 100644 --- a/pkg/sourcer/server_test.go +++ b/pkg/sourcer/server_test.go @@ -23,6 +23,10 @@ func (ts TestNoopSource) Pending(ctx context.Context) int64 { return 0 } +func (ts TestNoopSource) Partitions(ctx context.Context) []int32 { + return []int32{0} +} + func TestServer_Start(t *testing.T) { socketFile, _ := os.CreateTemp("/tmp", "numaflow-test.sock") defer func() { diff --git a/pkg/sourcer/service.go b/pkg/sourcer/service.go index 848a5b69..962691b1 100644 --- a/pkg/sourcer/service.go +++ b/pkg/sourcer/service.go @@ -111,3 +111,12 @@ func (fs *Service) AckFn(ctx context.Context, d *sourcepb.AckRequest) (*sourcepb Result: &sourcepb.AckResponse_Result{}, }, nil } + +func (fs *Service) PartitionsFn(ctx context.Context, _ *emptypb.Empty) (*sourcepb.PartitionsResponse, error) { + partitions := fs.Source.Partitions(ctx) + return &sourcepb.PartitionsResponse{ + Result: &sourcepb.PartitionsResponse_Result{ + Partitions: partitions, + }, + }, nil +} diff --git a/pkg/sourcer/service_test.go b/pkg/sourcer/service_test.go index 29f6bb2c..1f340777 100644 --- a/pkg/sourcer/service_test.go +++ b/pkg/sourcer/service_test.go @@ -16,15 +16,16 @@ import ( sourcepb "github.com/numaproj/numaflow-go/pkg/apis/proto/source/v1" ) -var TestEventTime = time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC) -var TestKey = "test-key" -var TestPendingNumber int64 = 123 +var testEventTime = time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC) +var testKey = "test-key" +var testPendingNumber int64 = 123 +var testPartitions = []int32{1, 3, 5} type TestSource struct{} func (ts TestSource) Read(_ context.Context, _ ReadRequest, messageCh chan<- Message) { - msg := NewMessage([]byte(`test`), Offset{}, TestEventTime) - messageCh <- msg.WithKeys([]string{TestKey}) + msg := NewMessage([]byte(`test`), Offset{}, testEventTime) + messageCh <- msg.WithKeys([]string{testKey}) } func (ts TestSource) Ack(_ context.Context, _ AckRequest) { @@ -33,7 +34,11 @@ func (ts TestSource) Ack(_ context.Context, _ AckRequest) { } func (ts TestSource) Pending(_ context.Context) int64 { - return TestPendingNumber + return testPendingNumber +} + +func (ts TestSource) Partitions(_ context.Context) []int32 { + return testPartitions } func TestService_IsReady(t *testing.T) { @@ -113,8 +118,8 @@ func TestService_ReadFn(t *testing.T) { Result: &sourcepb.ReadResponse_Result{ Payload: []byte(`test`), Offset: &sourcepb.Offset{}, - EventTime: timestamppb.New(TestEventTime), - Keys: []string{TestKey}, + EventTime: timestamppb.New(testEventTime), + Keys: []string{testKey}, }, }, }, @@ -133,8 +138,8 @@ func TestService_ReadFn(t *testing.T) { Result: &sourcepb.ReadResponse_Result{ Payload: []byte(`test`), Offset: &sourcepb.Offset{}, - EventTime: timestamppb.New(TestEventTime), - Keys: []string{TestKey}, + EventTime: timestamppb.New(testEventTime), + Keys: []string{testKey}, }, }, }, @@ -193,7 +198,7 @@ func TestService_AckFn(t *testing.T) { Request: &sourcepb.AckRequest_Request{ Offsets: []*sourcepb.Offset{ { - PartitionId: "0", + PartitionId: 0, Offset: []byte("test"), }, }, @@ -211,7 +216,19 @@ func TestService_PendingFn(t *testing.T) { got, err := fs.PendingFn(ctx, &emptypb.Empty{}) assert.Equal(t, got, &sourcepb.PendingResponse{ Result: &sourcepb.PendingResponse_Result{ - Count: TestPendingNumber, + Count: testPendingNumber, + }, + }) + assert.NoError(t, err) +} + +func TestService_PartitionsFn(t *testing.T) { + fs := &Service{Source: TestSource{}} + ctx := context.Background() + got, err := fs.PartitionsFn(ctx, &emptypb.Empty{}) + assert.EqualValues(t, got, &sourcepb.PartitionsResponse{ + Result: &sourcepb.PartitionsResponse_Result{ + Partitions: testPartitions, }, }) assert.NoError(t, err) diff --git a/pkg/sourcetransformer/examples/assign_event_time/Makefile b/pkg/sourcetransformer/examples/assign_event_time/Makefile index 82f0c095..5b6c5a68 100644 --- a/pkg/sourcetransformer/examples/assign_event_time/Makefile +++ b/pkg/sourcetransformer/examples/assign_event_time/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/mapt-assign-event-time:v0.5.0" --target assign-event-time . + docker build -t "quay.io/numaio/numaflow-go/mapt-assign-event-time:v0.6.0" --target assign-event-time . clean: -rm -rf ./dist diff --git a/pkg/sourcetransformer/examples/assign_event_time/go.mod b/pkg/sourcetransformer/examples/assign_event_time/go.mod index 54f770a7..21f42aa5 100644 --- a/pkg/sourcetransformer/examples/assign_event_time/go.mod +++ b/pkg/sourcetransformer/examples/assign_event_time/go.mod @@ -1,8 +1,8 @@ -module even_odd +module assign_event_time go 1.20 -require github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe +require github.com/numaproj/numaflow-go v0.6.0 require ( github.com/golang/protobuf v1.5.3 // indirect diff --git a/pkg/sourcetransformer/examples/assign_event_time/go.sum b/pkg/sourcetransformer/examples/assign_event_time/go.sum index 80fd3796..8b1ed758 100644 --- a/pkg/sourcetransformer/examples/assign_event_time/go.sum +++ b/pkg/sourcetransformer/examples/assign_event_time/go.sum @@ -4,8 +4,8 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= diff --git a/pkg/sourcetransformer/examples/event_time_filter/Makefile b/pkg/sourcetransformer/examples/event_time_filter/Makefile index bd46b298..faf9676d 100644 --- a/pkg/sourcetransformer/examples/event_time_filter/Makefile +++ b/pkg/sourcetransformer/examples/event_time_filter/Makefile @@ -4,7 +4,7 @@ build: .PHONY: image image: build - docker build -t "quay.io/numaio/numaflow-go/mapt-event-time-filter:v0.5.0" --target event-time-filter . + docker build -t "quay.io/numaio/numaflow-go/mapt-event-time-filter:v0.6.0" --target event-time-filter . clean: -rm -rf ./dist diff --git a/pkg/sourcetransformer/examples/event_time_filter/go.mod b/pkg/sourcetransformer/examples/event_time_filter/go.mod index ccac323c..408d5cab 100644 --- a/pkg/sourcetransformer/examples/event_time_filter/go.mod +++ b/pkg/sourcetransformer/examples/event_time_filter/go.mod @@ -1,9 +1,9 @@ -module even_odd +module event_time_filter go 1.20 require ( - github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe + github.com/numaproj/numaflow-go v0.6.0 github.com/stretchr/testify v1.8.1 ) diff --git a/pkg/sourcetransformer/examples/event_time_filter/go.sum b/pkg/sourcetransformer/examples/event_time_filter/go.sum index 170264f6..b1cec696 100644 --- a/pkg/sourcetransformer/examples/event_time_filter/go.sum +++ b/pkg/sourcetransformer/examples/event_time_filter/go.sum @@ -10,8 +10,8 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe h1:nK/BGffgwQ4L9pyllwzSZttPxMf+OOqK3DOP97KZdRk= -github.com/numaproj/numaflow-go v0.4.6-0.20230828035951-6f79b632ecfe/go.mod h1:zcJq1YAA/jnxCQLW7EFK4+HXWCd2QtW4tMOvRjHFa2g= +github.com/numaproj/numaflow-go v0.6.0 h1:gqTX1u1pFJJhX/3l3zYM8aLqRSHEainYrgBIollL0js= +github.com/numaproj/numaflow-go v0.6.0/go.mod h1:5zwvvREIbqaCPCKsNE1MVjVToD0kvkCh2Z90Izlhw5U= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= diff --git a/pkg/sourcetransformer/examples/event_time_filter/impl/filter.go b/pkg/sourcetransformer/examples/event_time_filter/impl/filter.go index 5e93c28b..9feb5f29 100644 --- a/pkg/sourcetransformer/examples/event_time_filter/impl/filter.go +++ b/pkg/sourcetransformer/examples/event_time_filter/impl/filter.go @@ -6,11 +6,11 @@ import ( "github.com/numaproj/numaflow-go/pkg/sourcetransformer" ) -func FilterEventTime(keys []string, d sourcetransformer.Datum) sourcetransformer.Messages { +func FilterEventTime(_ []string, d sourcetransformer.Datum) sourcetransformer.Messages { janFirst2022 := time.Date(2022, 1, 1, 0, 0, 0, 0, time.UTC) janFirst2023 := time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC) if d.EventTime().Before(janFirst2022) { - return sourcetransformer.MessagesBuilder().Append(sourcetransformer.MessageToDrop()) + return sourcetransformer.MessagesBuilder().Append(sourcetransformer.MessageToDrop(d.EventTime())) } else if d.EventTime().Before(janFirst2023) { return sourcetransformer.MessagesBuilder().Append(sourcetransformer.NewMessage(d.Value(), janFirst2022).WithTags([]string{"within_year_2022"})) } else { diff --git a/pkg/sourcetransformer/examples/event_time_filter/impl/filter_test.go b/pkg/sourcetransformer/examples/event_time_filter/impl/filter_test.go index 5d913268..fb8b5b06 100644 --- a/pkg/sourcetransformer/examples/event_time_filter/impl/filter_test.go +++ b/pkg/sourcetransformer/examples/event_time_filter/impl/filter_test.go @@ -66,7 +66,7 @@ func Test_FilterEventTime(t *testing.T) { name: "DatumWithEventTimeBefore2022GetsDropped", input: beforeYear2022Datum{}, expectedOutput: sourcetransformer.Messages{ - sourcetransformer.MessageToDrop(), + sourcetransformer.MessageToDrop(time.Date(2021, 8, 1, 0, 14, 0, 0, time.UTC)), }, }, { diff --git a/pkg/sourcetransformer/examples/event_time_filter/main.go b/pkg/sourcetransformer/examples/event_time_filter/main.go index 8ff8dccf..760af4cc 100644 --- a/pkg/sourcetransformer/examples/event_time_filter/main.go +++ b/pkg/sourcetransformer/examples/event_time_filter/main.go @@ -4,8 +4,9 @@ import ( "context" "log" + "event_time_filter/impl" + "github.com/numaproj/numaflow-go/pkg/sourcetransformer" - "github.com/numaproj/numaflow-go/pkg/sourcetransformer/examples/event_time_filter/impl" ) func transform(_ context.Context, keys []string, d sourcetransformer.Datum) sourcetransformer.Messages { diff --git a/pkg/sourcetransformer/messaget.go b/pkg/sourcetransformer/messaget.go index e7199a83..282b89ae 100644 --- a/pkg/sourcetransformer/messaget.go +++ b/pkg/sourcetransformer/messaget.go @@ -5,12 +5,7 @@ import ( "time" ) -var ( - DROP = fmt.Sprintf("%U__DROP__", '\\') // U+005C__DROP__ - // Watermark are at millisecond granularity, hence we use epoch(0) - 1 to indicate watermark is not available. - // eventTimeForDrop is used to indicate that the message is dropped hence, excluded from watermark calculation - eventTimeForDrop = time.Unix(0, -int64(time.Millisecond)) -) +var DROP = fmt.Sprintf("%U__DROP__", '\\') // U+005C__DROP__ // Message is used to wrap the data return by SourceTransformer functions. // Compared with Message of other UDFs, source transformer Message contains one more field, @@ -60,9 +55,11 @@ func (m Message) Tags() []string { return m.tags } -// MessageToDrop creates a Message to be dropped -func MessageToDrop() Message { - return Message{eventTime: eventTimeForDrop, value: []byte{}, tags: []string{DROP}} +// MessageToDrop creates a Message to be dropped with eventTime. +// eventTime is required because, even though a message is dropped, it is still considered as being processed, +// hence the watermark should be updated accordingly using the provided event time. +func MessageToDrop(eventTime time.Time) Message { + return Message{eventTime: eventTime, value: []byte{}, tags: []string{DROP}} } type Messages []Message diff --git a/pkg/sourcetransformer/options.go b/pkg/sourcetransformer/options.go index f98147a5..8cbc2fb4 100644 --- a/pkg/sourcetransformer/options.go +++ b/pkg/sourcetransformer/options.go @@ -13,7 +13,7 @@ type options struct { // Option is the interface to apply options. type Option func(*options) -func DefaultOptions() *options { +func defaultOptions() *options { return &options{ sockAddr: address, maxMessageSize: defaultMaxMessageSize, diff --git a/pkg/sourcetransformer/server.go b/pkg/sourcetransformer/server.go index 5d6dbeb2..f4bf8775 100644 --- a/pkg/sourcetransformer/server.go +++ b/pkg/sourcetransformer/server.go @@ -19,7 +19,7 @@ type server struct { // NewServer creates a new SourceTransformer server. func NewServer(m SourceTransformer, inputOptions ...Option) numaflow.Server { - opts := DefaultOptions() + opts := defaultOptions() for _, inputOption := range inputOptions { inputOption(opts) } diff --git a/pkg/sourcetransformer/service_test.go b/pkg/sourcetransformer/service_test.go index 5269deb0..d57d937e 100644 --- a/pkg/sourcetransformer/service_test.go +++ b/pkg/sourcetransformer/service_test.go @@ -79,7 +79,7 @@ func TestService_sourceTransformFn(t *testing.T) { { name: "sourceTransform_fn_forward_msg_drop_msg", handler: SourceTransformFunc(func(ctx context.Context, keys []string, datum Datum) Messages { - return MessagesBuilder().Append(MessageToDrop()) + return MessagesBuilder().Append(MessageToDrop(testTime)) }), args: args{ ctx: context.Background(), @@ -93,7 +93,7 @@ func TestService_sourceTransformFn(t *testing.T) { want: &stpb.SourceTransformResponse{ Results: []*stpb.SourceTransformResponse_Result{ { - EventTime: timestamppb.New(eventTimeForDrop), + EventTime: timestamppb.New(testTime), Tags: []string{DROP}, Value: []byte{}, },