Dynamic splitting by interval for range queries (#6458)

afhassan · web-flow · commit ad49b2e07290 · 2025-02-13T19:57:26.000-08:00
* add limit for range query max splits by interval

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* Change dynamic interval sharding to take into account vertical sharding

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add dynamic sharding based on total days of data fetched for query

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add unit tests for dynamicIntervalFn

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* allow using any base interval size for dynamicIntervalFn

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add dynamic query splits to experimental features

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* rename dynamicIntervalFn unit tests

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* refactor dynamicIntervalFn to be more readable

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add unit tests for getIntervalFromMaxSplits

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add unit tests for analyzeDurationFetchedByQuery

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* fix formatting

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* update docs

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* update experimental features

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* fix dynamic splitting when query range is shorter than base interval

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* refactor dynamic query splitting into smaller helper functions

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* use duration instead of int for dynamic query splitting calculation

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add comments for getIntervalFromMaxSplits

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add ceilDiv helper function

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add default max splits by duration fetched

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add new unit tests for helper functions

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

* add changelog

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;

---------

Signed-off-by: Ahmed Hassan &lt;afayekhassan@gmail.com&gt;
Signed-off-by: Ahmed Hassan &lt;57634502+afhassan@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,8 @@
 # Changelog
 
 ## master / unreleased
+* [FEATURE] Query Frontend: Add dynamic interval size for query splitting. This is enabled by configuring experimental flags `querier.max-shards-per-query` and/or `querier.max-fetched-data-duration-per-query`. The split interval size is dynamically increased to maintain a number of shards and total duration fetched below the configured values. #6458
 * [ENHANCEMENT] Add `compactor.auto-forget-delay` for compactor to auto forget compactors after X minutes without heartbeat. #6533
-
 * [FEATURE] Querier/Ruler: Add `query_partial_data` and `rules_partial_data` limits to allow queries/rules to be evaluated with data from a single zone, if other zones are not available. #6526
 * [ENHANCEMENT] StoreGateway: Emit more histogram buckets on the `cortex_querier_storegateway_refetches_per_query` metric. #6570
 * [ENHANCEMENT] Querier: Apply bytes limiter to LabelNames and LabelValuesForLabelNames. #6568
diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
@@ -4276,6 +4276,22 @@ The `query_range_config` configures the query splitting and caching in the Corte
 # CLI flag: -querier.split-queries-by-interval
 [split_queries_by_interval: <duration> | default = 0s]
 
+dynamic_query_splits:
+  # [EXPERIMENTAL] Maximum number of shards for a query, 0 disables it.
+  # Dynamically uses a multiple of split interval to maintain a total number of
+  # shards below the set value. If vertical sharding is enabled for a query, the
+  # combined total number of interval splits and vertical shards is kept below
+  # this value.
+  # CLI flag: -querier.max-shards-per-query
+  [max_shards_per_query: <int> | default = 0]
+
+  # [EXPERIMENTAL] Max total duration of data fetched from storage by all query
+  # shards, 0 disables it. Dynamically uses a multiple of split interval to
+  # maintain a total fetched duration of data lower than the value set. It takes
+  # into account additional duration fetched by matrix selectors and subqueries.
+  # CLI flag: -querier.max-fetched-data-duration-per-query
+  [max_fetched_data_duration_per_query: <duration> | default = 0s]
+
 # Mutate incoming queries to align their start and end with their step.
 # CLI flag: -querier.align-querier-with-step
 [align_queries_with_step: <boolean> | default = false]
diff --git a/docs/configuration/v1-guarantees.md b/docs/configuration/v1-guarantees.md
@@ -120,3 +120,6 @@ Currently experimental features are:
   - Enable string interning for metrics labels by setting `-ingester.labels-string-interning-enabled` on Ingester.
 - Query-frontend: query rejection (`-frontend.query-rejection.enabled`)
 - Querier: protobuf codec (`-api.querier-default-codec`)
+- Query-frontend: dynamic query splits
+  - `querier.max-shards-per-query` (int) CLI flag
+  - `querier.max-fetched-data-duration-per-query` (duration) CLI flag
diff --git a/pkg/frontend/transport/handler.go b/pkg/frontend/transport/handler.go
@@ -372,6 +372,7 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
 	splitQueries := stats.LoadSplitQueries()
 	dataSelectMaxTime := stats.LoadDataSelectMaxTime()
 	dataSelectMinTime := stats.LoadDataSelectMinTime()
+	splitInterval := stats.LoadSplitInterval()
 
 	// Track stats.
 	f.querySeconds.WithLabelValues(source, userID).Add(wallTime.Seconds())
@@ -446,6 +447,10 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
 		logMessage = append(logMessage, "query_storage_wall_time_seconds", sws)
 	}
 
+	if splitInterval > 0 {
+		logMessage = append(logMessage, "split_interval", splitInterval.String())
+	}
+
 	if error != nil {
 		s, ok := status.FromError(error)
 		if !ok {
diff --git a/pkg/querier/stats/stats.go b/pkg/querier/stats/stats.go
@@ -21,6 +21,7 @@ type QueryStats struct {
 	Priority            int64
 	DataSelectMaxTime   int64
 	DataSelectMinTime   int64
+	SplitInterval       time.Duration
 	m                   sync.Mutex
 }
 
@@ -287,6 +288,14 @@ func (s *QueryStats) LoadDataSelectMinTime() int64 {
 	return atomic.LoadInt64(&s.DataSelectMinTime)
 }
 
+func (s *QueryStats) LoadSplitInterval() time.Duration {
+	if s == nil {
+		return 0
+	}
+
+	return s.SplitInterval
+}
+
 func (s *QueryStats) AddStoreGatewayTouchedPostings(count uint64) {
 	if s == nil {
 		return
diff --git a/pkg/querier/tripperware/queryrange/limits_test.go b/pkg/querier/tripperware/queryrange/limits_test.go
@@ -233,9 +233,10 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {
 }
 
 type mockLimits struct {
-	maxQueryLookback  time.Duration
-	maxQueryLength    time.Duration
-	maxCacheFreshness time.Duration
+	maxQueryLookback       time.Duration
+	maxQueryLength         time.Duration
+	maxCacheFreshness      time.Duration
+	queryVerticalShardSize int
 }
 
 func (m mockLimits) MaxQueryLookback(string) time.Duration {
@@ -255,7 +256,7 @@ func (m mockLimits) MaxCacheFreshness(string) time.Duration {
 }
 
 func (m mockLimits) QueryVerticalShardSize(userID string) int {
-	return 0
+	return m.queryVerticalShardSize
 }
 
 func (m mockLimits) QueryPriority(userID string) validation.QueryPriority {
diff --git a/pkg/querier/tripperware/queryrange/query_range_middlewares.go b/pkg/querier/tripperware/queryrange/query_range_middlewares.go
@@ -34,11 +34,14 @@ const day = 24 * time.Hour
 
 // Config for query_range middleware chain.
 type Config struct {
-	SplitQueriesByInterval time.Duration `yaml:"split_queries_by_interval"`
-	AlignQueriesWithStep   bool          `yaml:"align_queries_with_step"`
-	ResultsCacheConfig     `yaml:"results_cache"`
-	CacheResults           bool `yaml:"cache_results"`
-	MaxRetries             int  `yaml:"max_retries"`
+	// Query splits config
+	SplitQueriesByInterval   time.Duration            `yaml:"split_queries_by_interval"`
+	DynamicQuerySplitsConfig DynamicQuerySplitsConfig `yaml:"dynamic_query_splits"`
+
+	AlignQueriesWithStep bool `yaml:"align_queries_with_step"`
+	ResultsCacheConfig   `yaml:"results_cache"`
+	CacheResults         bool `yaml:"cache_results"`
+	MaxRetries           int  `yaml:"max_retries"`
 	// List of headers which query_range middleware chain would forward to downstream querier.
 	ForwardHeaders flagext.StringSlice `yaml:"forward_headers_list"`
 
@@ -54,6 +57,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.CacheResults, "querier.cache-results", false, "Cache query results.")
 	f.Var(&cfg.ForwardHeaders, "frontend.forward-headers-list", "List of headers forwarded by the query Frontend to downstream querier.")
 	cfg.ResultsCacheConfig.RegisterFlags(f)
+	cfg.DynamicQuerySplitsConfig.RegisterFlags(f)
 }
 
 // Validate validates the config.
@@ -66,9 +70,25 @@ func (cfg *Config) Validate(qCfg querier.Config) error {
 			return errors.Wrap(err, "invalid ResultsCache config")
 		}
 	}
+	if cfg.DynamicQuerySplitsConfig.MaxShardsPerQuery > 0 || cfg.DynamicQuerySplitsConfig.MaxFetchedDataDurationPerQuery > 0 {
+		if cfg.SplitQueriesByInterval <= 0 {
+			return errors.New("configs under dynamic-query-splits requires that a value for split-queries-by-interval is set.")
+		}
+	}
 	return nil
 }
 
+type DynamicQuerySplitsConfig struct {
+	MaxShardsPerQuery              int           `yaml:"max_shards_per_query"`
+	MaxFetchedDataDurationPerQuery time.Duration `yaml:"max_fetched_data_duration_per_query"`
+}
+
+// RegisterFlags registers flags foy dynamic query splits
+func (cfg *DynamicQuerySplitsConfig) RegisterFlags(f *flag.FlagSet) {
+	f.IntVar(&cfg.MaxShardsPerQuery, "querier.max-shards-per-query", 0, "[EXPERIMENTAL] Maximum number of shards for a query, 0 disables it. Dynamically uses a multiple of split interval to maintain a total number of shards below the set value. If vertical sharding is enabled for a query, the combined total number of interval splits and vertical shards is kept below this value.")
+	f.DurationVar(&cfg.MaxFetchedDataDurationPerQuery, "querier.max-fetched-data-duration-per-query", 0, "[EXPERIMENTAL] Max total duration of data fetched from storage by all query shards, 0 disables it. Dynamically uses a multiple of split interval to maintain a total fetched duration of data lower than the value set. It takes into account additional duration fetched by matrix selectors and subqueries.")
+}
+
 // Middlewares returns list of middlewares that should be applied for range query.
 func Middlewares(
 	cfg Config,
@@ -89,8 +109,11 @@ func Middlewares(
 		queryRangeMiddleware = append(queryRangeMiddleware, tripperware.InstrumentMiddleware("step_align", metrics), StepAlignMiddleware)
 	}
 	if cfg.SplitQueriesByInterval != 0 {
-		staticIntervalFn := func(_ tripperware.Request) time.Duration { return cfg.SplitQueriesByInterval }
-		queryRangeMiddleware = append(queryRangeMiddleware, tripperware.InstrumentMiddleware("split_by_interval", metrics), SplitByIntervalMiddleware(staticIntervalFn, limits, prometheusCodec, registerer))
+		intervalFn := staticIntervalFn(cfg)
+		if cfg.DynamicQuerySplitsConfig.MaxShardsPerQuery > 0 || cfg.DynamicQuerySplitsConfig.MaxFetchedDataDurationPerQuery > 0 {
+			intervalFn = dynamicIntervalFn(cfg, limits, queryAnalyzer, lookbackDelta)
+		}
+		queryRangeMiddleware = append(queryRangeMiddleware, tripperware.InstrumentMiddleware("split_by_interval", metrics), SplitByIntervalMiddleware(intervalFn, limits, prometheusCodec, registerer, lookbackDelta))
 	}
 
 	var c cache.Cache
diff --git a/pkg/querier/tripperware/queryrange/split_by_interval.go b/pkg/querier/tripperware/queryrange/split_by_interval.go
diff --git a/pkg/querier/tripperware/queryrange/split_by_interval_test.go b/pkg/querier/tripperware/queryrange/split_by_interval_test.go
diff --git a/pkg/util/time.go b/pkg/util/time.go

Original file line number	Diff line number	Diff line change
`@@ -233,9 +233,10 @@ func TestLimitsMiddleware_MaxQueryLength(t *testing.T) {`
`233`	`233`	`}`
`234`	`234`
`235`	`235`	`type mockLimits struct {`
`236`		`- maxQueryLookback time.Duration`
`237`		`- maxQueryLength time.Duration`
`238`		`- maxCacheFreshness time.Duration`
	`236`	`+ maxQueryLookback time.Duration`
	`237`	`+ maxQueryLength time.Duration`
	`238`	`+ maxCacheFreshness time.Duration`
	`239`	`+ queryVerticalShardSize int`
`239`	`240`	`}`
`240`	`241`
`241`	`242`	`func (m mockLimits) MaxQueryLookback(string) time.Duration {`
`@@ -255,7 +256,7 @@ func (m mockLimits) MaxCacheFreshness(string) time.Duration {`
`255`	`256`	`}`
`256`	`257`
`257`	`258`	`func (m mockLimits) QueryVerticalShardSize(userID string) int {`
`258`		`- return 0`
	`259`	`+ return m.queryVerticalShardSize`
`259`	`260`	`}`
`260`	`261`
`261`	`262`	`func (m mockLimits) QueryPriority(userID string) validation.QueryPriority {`