-
Notifications
You must be signed in to change notification settings - Fork 145
feat :Add GetActivePods to handle/datastore and remove deleted pod from prefix-cache scorer #1376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,8 @@ package plugins | |
import ( | ||
"context" | ||
"fmt" | ||
|
||
"k8s.io/apimachinery/pkg/types" | ||
) | ||
|
||
// Handle provides plugins a set of standard data and tools to work with | ||
|
@@ -27,6 +29,9 @@ type Handle interface { | |
Context() context.Context | ||
|
||
HandlePlugins | ||
|
||
// GetActivePods returns a list of all active pods | ||
GetActivePods() []types.NamespacedName | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: In go, it's more idiomatic to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider returning a |
||
} | ||
|
||
// HandlePlugins defines a set of APIs to work with instantiated plugins | ||
|
@@ -44,10 +49,14 @@ type HandlePlugins interface { | |
GetAllPluginsWithNames() map[string]Plugin | ||
} | ||
|
||
// GetActivePodsFunc is a function that returns a list of all active pods. | ||
type GetActivePodsFunc func() []types.NamespacedName | ||
|
||
// eppHandle is an implementation of the interface plugins.Handle | ||
type eppHandle struct { | ||
ctx context.Context | ||
HandlePlugins | ||
getActivePods GetActivePodsFunc | ||
} | ||
|
||
// Context returns a context the plugins can use, if they need one | ||
|
@@ -84,7 +93,12 @@ func (h *eppHandlePlugins) GetAllPluginsWithNames() map[string]Plugin { | |
return h.plugins | ||
} | ||
|
||
func NewEppHandle(ctx context.Context) Handle { | ||
// GetActivePods returns a function that returns a list of all active pods | ||
func (h *eppHandle) GetActivePods() []types.NamespacedName { | ||
return h.getActivePods() | ||
} | ||
|
||
func NewEppHandle(ctx context.Context, getActivePods GetActivePodsFunc) Handle { | ||
return &eppHandle{ | ||
ctx: ctx, | ||
HandlePlugins: &eppHandlePlugins{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ import ( | |
"encoding/binary" | ||
"encoding/json" | ||
"fmt" | ||
"time" | ||
|
||
"github.com/cespare/xxhash/v2" | ||
k8stypes "k8s.io/apimachinery/pkg/types" | ||
|
@@ -55,6 +56,11 @@ const ( | |
PrefixCachePluginType = "prefix-cache-scorer" | ||
) | ||
|
||
const ( | ||
PodActiveCheckInterval = 1 * time.Minute | ||
PodInactivityTimeout = 5 * time.Minute | ||
) | ||
|
||
var DefaultConfig = Config{ | ||
HashBlockSize: DefaultHashBlockSize, | ||
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks, | ||
|
@@ -84,6 +90,7 @@ type podSet map[ServerID]struct{} | |
type Indexer interface { | ||
Get(hash BlockHash) podSet | ||
Add(hashes []BlockHash, server ServerID) | ||
RemovePod(server ServerID) | ||
} | ||
|
||
// BlockHash is a hash of the block of request body. | ||
|
@@ -125,7 +132,7 @@ var _ framework.Scorer = &Plugin{} | |
var _ framework.PostCycle = &Plugin{} | ||
|
||
// PrefixCachePluginFactory defines the factory function for Prefix plugin. | ||
func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) { | ||
func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, handle plugins.Handle) (plugins.Plugin, error) { | ||
parameters := Config{ | ||
HashBlockSize: DefaultHashBlockSize, | ||
MaxPrefixBlocksToMatch: DefaultMaxPrefixBlocks, | ||
|
@@ -138,7 +145,9 @@ func PrefixCachePluginFactory(name string, rawParameters json.RawMessage, _ plug | |
} | ||
} | ||
|
||
return New(parameters).WithName(name), nil | ||
p := New(parameters).WithName(name) | ||
go p.StartPodActiveWatcher(handle.Context(), handle) | ||
return p, nil | ||
} | ||
|
||
// New initializes a new prefix Plugin and returns its pointer. | ||
|
@@ -239,6 +248,45 @@ func (m *Plugin) matchLongestPrefix(ctx context.Context, hashes []BlockHash) map | |
return res | ||
} | ||
|
||
// StartPodActiveWatcher starts a goroutine that watches for active pods. | ||
func (m *Plugin) StartPodActiveWatcher(ctx context.Context, handle plugins.Handle) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we call this something like |
||
logger := log.FromContext(ctx).V(logutil.VERBOSE) | ||
|
||
ticker := time.NewTicker(PodActiveCheckInterval) | ||
defer ticker.Stop() | ||
|
||
podLastSeen := make(map[ServerID]time.Time) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am thinking of a much simpler implementation: iterate over current pods in the |
||
|
||
for { | ||
select { | ||
case <-ctx.Done(): | ||
return | ||
case <-ticker.C: | ||
now := time.Now() | ||
activePods := handle.GetActivePods() | ||
|
||
// Track active pods | ||
activeSet := make(map[ServerID]struct{}, len(activePods)) | ||
for _, np := range activePods { | ||
id := ServerID(np) | ||
activeSet[id] = struct{}{} | ||
podLastSeen[id] = now | ||
} | ||
|
||
// Remove stale pods | ||
for pod, lastSeen := range podLastSeen { | ||
if _, stillActive := activeSet[pod]; !stillActive { | ||
if now.Sub(lastSeen) > PodInactivityTimeout { | ||
m.indexer.RemovePod(pod) | ||
delete(podLastSeen, pod) | ||
logger.Info("Removed inactive pod from prefix cache", "pod", pod) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// hashPrompt divides the prompt into blocks and calculate the prefix cache for each block. | ||
// hash(0) is the hash of the model name, since different models generally don't share prefix cache. | ||
// For block i, hash(i) = hash(block i content, hash(i-1)). | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need to add this, we can use
PodList
.