-
-
Notifications
You must be signed in to change notification settings - Fork 356
feat(watcher): introduce ListWatchParallel mode to handle large-scale resources #1748
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,15 +6,15 @@ | |
|
||
use backon::BackoffBuilder; | ||
use educe::Educe; | ||
use futures::{stream::BoxStream, Stream, StreamExt}; | ||
use futures::{poll, stream::BoxStream, Stream, StreamExt}; | ||
use kube_client::{ | ||
api::{ListParams, Resource, ResourceExt, VersionMatch, WatchEvent, WatchParams}, | ||
core::{metadata::PartialObjectMeta, ObjectList, Selector}, | ||
error::ErrorResponse, | ||
Api, Error as ClientErr, | ||
}; | ||
use serde::de::DeserializeOwned; | ||
use std::{clone::Clone, collections::VecDeque, fmt::Debug, future, time::Duration}; | ||
use std::{clone::Clone, collections::VecDeque, fmt::Debug, future, task::Poll, time::Duration}; | ||
use thiserror::Error; | ||
use tracing::{debug, error, warn}; | ||
|
||
|
@@ -107,6 +107,16 @@ | |
objects: VecDeque<K>, | ||
last_bookmark: Option<String>, | ||
}, | ||
/// Completed the first page of the LIST operation, now transitioning to the watch phase | ||
/// while continuing to list subsequent pages | ||
WatchedInitPage { | ||
continue_token: Option<String>, | ||
objects: VecDeque<K>, | ||
last_bookmark: Option<String>, | ||
#[educe(Debug(ignore))] | ||
stream: BoxStream<'static, kube_client::Result<WatchEvent<K>>>, | ||
stream_events: VecDeque<WatchEvent<K>>, | ||
}, | ||
/// Kubernetes 1.27 Streaming Lists | ||
/// The initial watch is in progress | ||
InitialWatch { | ||
|
@@ -115,6 +125,14 @@ | |
}, | ||
/// The initial LIST was successful, so we should move on to starting the actual watch. | ||
InitListed { resource_version: String }, | ||
/// After completing the list operation, process the backlog of cached events from the watch | ||
/// stream. | ||
WatchedInitListed { | ||
resource_version: String, | ||
#[educe(Debug(ignore))] | ||
stream: BoxStream<'static, kube_client::Result<WatchEvent<K>>>, | ||
stream_events: VecDeque<WatchEvent<K>>, | ||
}, | ||
/// The watch is in progress, from this point we just return events from the server. | ||
/// | ||
/// If the connection is disrupted then we propagate the error but try to restart the watch stream by | ||
|
@@ -183,6 +201,11 @@ | |
/// When using this mode, you can configure the `page_size` on the watcher. | ||
#[default] | ||
ListWatch, | ||
/// List first, Starts watching after retrieving the first page of the list. | ||
/// | ||
/// Suitable for clusters with a large number of resources where list operations are slow. | ||
/// Prevents resource version expiration caused by waiting for the entire list operation to complete before starting the watch. | ||
ListWatchParallel, | ||
/// Kubernetes 1.27 Streaming Lists | ||
/// | ||
/// See [upstream documentation on streaming lists](https://kubernetes.io/docs/reference/using-api/api-concepts/#streaming-lists), | ||
|
@@ -461,11 +484,13 @@ | |
{ | ||
match state { | ||
State::Empty => match wc.initial_list_strategy { | ||
InitialListStrategy::ListWatch => (Some(Ok(Event::Init)), State::InitPage { | ||
continue_token: None, | ||
objects: VecDeque::default(), | ||
last_bookmark: None, | ||
}), | ||
InitialListStrategy::ListWatch | InitialListStrategy::ListWatchParallel => { | ||
(Some(Ok(Event::Init)), State::InitPage { | ||
continue_token: None, | ||
objects: VecDeque::default(), | ||
last_bookmark: None, | ||
}) | ||
} | ||
InitialListStrategy::StreamingList => match api.watch(&wc.to_watch_params(), "0").await { | ||
Ok(stream) => (None, State::InitialWatch { stream }), | ||
Err(err) => { | ||
|
@@ -499,6 +524,110 @@ | |
} | ||
let mut lp = wc.to_list_params(); | ||
lp.continue_token = continue_token; | ||
match api.list(&lp).await { | ||
Ok(list) => { | ||
let last_bookmark = list.metadata.resource_version.filter(|s| !s.is_empty()); | ||
let continue_token = list.metadata.continue_.filter(|s| !s.is_empty()); | ||
if last_bookmark.is_none() && continue_token.is_none() { | ||
return (Some(Err(Error::NoResourceVersion)), State::Empty); | ||
} | ||
match wc.initial_list_strategy { | ||
InitialListStrategy::ListWatch => { | ||
// we have drained the last page - move on to next stage | ||
(None, State::InitPage { | ||
continue_token, | ||
objects: list.items.into_iter().collect(), | ||
last_bookmark, | ||
}) | ||
} | ||
InitialListStrategy::ListWatchParallel => { | ||
// start watch | ||
match api | ||
.watch(&wc.to_watch_params(), &last_bookmark.clone().unwrap()) | ||
.await | ||
{ | ||
Ok(stream) => (None, State::WatchedInitPage { | ||
continue_token, | ||
objects: list.items.into_iter().collect(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
last_bookmark: last_bookmark, | ||
Check failure on line 552 in kube-runtime/src/watcher.rs
|
||
stream, | ||
stream_events: VecDeque::default(), | ||
}), | ||
Err(err) => { | ||
if std::matches!(err, ClientErr::Api(ErrorResponse { code: 403, .. })) { | ||
warn!("watch initlist error with 403: {err:?}"); | ||
} else { | ||
debug!("watch initlist error: {err:?}"); | ||
} | ||
(Some(Err(Error::WatchStartFailed(err))), State::Empty) | ||
} | ||
} | ||
} | ||
_ => unreachable!(), | ||
Check failure on line 566 in kube-runtime/src/watcher.rs
|
||
} | ||
} | ||
Err(err) => { | ||
if std::matches!(err, ClientErr::Api(ErrorResponse { code: 403, .. })) { | ||
warn!("watch list error with 403: {err:?}"); | ||
} else { | ||
debug!("watch list error: {err:?}"); | ||
} | ||
(Some(Err(Error::InitialListFailed(err))), State::Empty) | ||
} | ||
} | ||
} | ||
State::WatchedInitPage { | ||
continue_token, | ||
mut objects, | ||
last_bookmark, | ||
mut stream, | ||
mut stream_events, | ||
} => { | ||
if let Some(next) = objects.pop_front() { | ||
return (Some(Ok(Event::InitApply(next))), State::WatchedInitPage { | ||
continue_token, | ||
objects, | ||
last_bookmark, | ||
stream, | ||
stream_events, | ||
}); | ||
} | ||
// Attempt to asynchronously fetch events from the Watch Stream and cache them. | ||
// If an error occurs at this stage, restart the list operation. | ||
loop { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unfortunately awkward and complex 😢 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This step only buffers incremental events from the Watch stream into memory cache. The polling operation terminates immediately when no new events are available. Since the I/O operations are asynchronous and non-blocking, their overhead is negligible compared to the inherent latency of list operations. Therefore, this does not significantly increase the total list+watch processing time. |
||
let event = poll!(stream.next()); | ||
match event { | ||
Poll::Ready(Some(Ok(WatchEvent::Error(err)))) => { | ||
return (Some(Err(Error::WatchError(err))), State::default()); | ||
} | ||
Poll::Ready(Some(Ok(event))) => { | ||
stream_events.push_back(event); | ||
} | ||
Poll::Ready(Some(Err(err))) => { | ||
return (Some(Err(Error::WatchFailed(err))), State::default()); | ||
} | ||
Poll::Ready(None) => { | ||
// Stream ended, we need to restart the list operation | ||
return (None, State::default()); | ||
} | ||
Poll::Pending => { | ||
break; | ||
} | ||
} | ||
} | ||
// check if we need to perform more pages | ||
if continue_token.is_none() { | ||
if let Some(resource_version) = last_bookmark { | ||
// we have drained the last page - move on to next stage | ||
return (Some(Ok(Event::InitDone)), State::WatchedInitListed { | ||
resource_version, | ||
stream, | ||
stream_events, | ||
}); | ||
} | ||
} | ||
let mut lp = wc.to_list_params(); | ||
lp.continue_token = continue_token; | ||
match api.list(&lp).await { | ||
Ok(list) => { | ||
let last_bookmark = list.metadata.resource_version.filter(|s| !s.is_empty()); | ||
|
@@ -508,10 +637,12 @@ | |
} | ||
// Buffer page here, causing us to return to this enum branch (State::InitPage) | ||
// until the objects buffer has drained | ||
(None, State::InitPage { | ||
(None, State::WatchedInitPage { | ||
continue_token, | ||
objects: list.items.into_iter().collect(), | ||
last_bookmark, | ||
stream, | ||
stream_events, | ||
}) | ||
} | ||
Err(err) => { | ||
|
@@ -589,6 +720,52 @@ | |
} | ||
} | ||
} | ||
State::WatchedInitListed { | ||
resource_version, | ||
stream, | ||
mut stream_events, | ||
} => { | ||
if let Some(event) = stream_events.pop_front() { | ||
match event { | ||
WatchEvent::Added(obj) | WatchEvent::Modified(obj) => { | ||
let resource_version = obj.resource_version().unwrap_or_default(); | ||
return if resource_version.is_empty() { | ||
(Some(Err(Error::NoResourceVersion)), State::default()) | ||
} else { | ||
(Some(Ok(Event::Apply(obj))), State::WatchedInitListed { | ||
resource_version, | ||
stream, | ||
stream_events, | ||
}) | ||
}; | ||
} | ||
WatchEvent::Deleted(obj) => { | ||
let resource_version = obj.resource_version().unwrap_or_default(); | ||
return if resource_version.is_empty() { | ||
(Some(Err(Error::NoResourceVersion)), State::default()) | ||
} else { | ||
(Some(Ok(Event::Delete(obj))), State::WatchedInitListed { | ||
resource_version, | ||
stream, | ||
stream_events, | ||
}) | ||
}; | ||
} | ||
WatchEvent::Bookmark(bm) => { | ||
return (None, State::WatchedInitListed { | ||
resource_version: bm.metadata.resource_version, | ||
stream, | ||
stream_events, | ||
}) | ||
} | ||
_ => unreachable!(), | ||
Check failure on line 761 in kube-runtime/src/watcher.rs
|
||
} | ||
} | ||
(None, State::Watching { | ||
resource_version, | ||
stream, | ||
}) | ||
} | ||
State::Watching { | ||
resource_version, | ||
mut stream, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this unwrap fallible?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a preceding last_bookmark.is_none() check, so this unwrap is guaranteed to be safe here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The preceding check only fails if
continue
is also none.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you are right. I will fix it later