Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
* [ENHANCEMENT] Upgraded container base images to `alpine:3.23`. #7163
* [ENHANCEMENT] Ingester: Instrument Ingester CPU profile with userID for read APIs. #7184
* [ENHANCEMENT] Ingester: Add fetch timeout for Ingester expanded postings cache. #7185
* [ENHANCEMENT] Ingester: Add feature flag to collect metrics of how expensive an unoptimized regex matcher is and new limits to protect Ingester query path against expensive unoptimized regex matchers. #7194
* [BUGFIX] Ring: Change DynamoDB KV to retry indefinitely for WatchKey. #7088
* [BUGFIX] Ruler: Add XFunctions validation support. #7111
* [BUGFIX] Querier: propagate Prometheus info annotations in protobuf responses. #7132
Expand Down
24 changes: 24 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3816,6 +3816,12 @@ instance_limits:
# CLI flag: -ingester.enable-matcher-optimization
[enable_matcher_optimization: <boolean> | default = false]

# Enable regex matcher limits and metrics collection for unoptimized regex
# queries. When enabled, the ingester will track pattern length, label
# cardinality, and total value length for unoptimized regex matchers.
# CLI flag: -ingester.enable-regex-matcher-limits
[enable_regex_matcher_limits: <boolean> | default = false]

query_protection:
rejection:
threshold:
Expand Down Expand Up @@ -4111,6 +4117,24 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s
# CLI flag: -blocks-storage.tsdb.enable-native-histograms
[enable_native_histograms: <boolean> | default = false]

# Maximum length (in bytes) of an unoptimized regex pattern. This is a
# pre-flight check to reject expensive regex queries. 0 to disable. This is only
# enforced in Ingester.
# CLI flag: -validation.max-regex-pattern-length
[max_regex_pattern_length: <int> | default = 0]

# Maximum cardinality of a label that can be queried with an unoptimized regex
# matcher. If exceeded, the query will be rejected with a limit error. 0 to
# disable. This is only enforced in Ingester.
# CLI flag: -validation.max-label-cardinality-for-unoptimized-regex
[max_label_cardinality_for_unoptimized_regex: <int> | default = 0]

# Maximum total length (in bytes) of all label values combined for an
# unoptimized regex matcher. If exceeded, the query will be rejected with a
# limit error. 0 to disable. This is only enforced in Ingester.
# CLI flag: -validation.max-total-label-value-length-for-unoptimized-regex
[max_total_label_value_length_for_unoptimized_regex: <int> | default = 0]

# The maximum number of active metrics with metadata per user, per ingester. 0
# to disable.
# CLI flag: -ingester.max-metadata-per-user
Expand Down
6 changes: 6 additions & 0 deletions docs/configuration/v1-guarantees.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,9 @@ Currently experimental features are:
- `-ingester.active-queried-series-metrics-update-period` metric update interval
- `-ingester.active-queried-series-metrics-window-duration` each HyperLogLog time window size
- `-ingester.active-queried-series-metrics-sample-rate` query sampling rate
- Ingester: Regex Matcher Limits
- Enable regex matcher limits and metrics collection via `-ingester.enable-regex-matcher-limits=true`
- Per-tenant limits for unoptimized regex matchers:
- `-validation.max-regex-pattern-length` (int) - maximum pattern length in bytes
- `-validation.max-label-cardinality-for-unoptimized-regex` (int) - maximum label cardinality
- `-validation.max-total-label-value-length-for-unoptimized-regex` (int) - maximum total length of all label values in bytes
18 changes: 11 additions & 7 deletions pkg/frontend/transport/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,16 @@ const (
reasonSeriesLimitStoreGateway = "store_gateway_series_limit"
reasonChunksLimitStoreGateway = "store_gateway_chunks_limit"
reasonBytesLimitStoreGateway = "store_gateway_bytes_limit"
reasonUnOptimizedRegexMatcher = `unoptimized_regex_matcher`

limitTooManySamples = `query processing would load too many samples into memory`
limitTimeRangeExceeded = `the query time range exceeds the limit`
limitResponseSizeExceeded = `the query response size exceeds limit`
limitSeriesFetched = `the query hit the max number of series limit`
limitChunksFetched = `the query hit the max number of chunks limit`
limitChunkBytesFetched = `the query hit the aggregated chunks size limit`
limitDataBytesFetched = `the query hit the aggregated data size limit`
limitTooManySamples = `query processing would load too many samples into memory`
limitTimeRangeExceeded = `the query time range exceeds the limit`
limitResponseSizeExceeded = `the query response size exceeds limit`
limitSeriesFetched = `the query hit the max number of series limit`
limitChunksFetched = `the query hit the max number of chunks limit`
limitChunkBytesFetched = `the query hit the aggregated chunks size limit`
limitDataBytesFetched = `the query hit the aggregated data size limit`
limitUnOptimizedRegexMatcher = `unoptimized regex matcher`

// Store gateway limits.
limitSeriesStoreGateway = `exceeded series limit`
Expand Down Expand Up @@ -585,6 +587,8 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
reason = reasonChunksLimitStoreGateway
} else if strings.Contains(errMsg, limitBytesStoreGateway) {
reason = reasonBytesLimitStoreGateway
} else if strings.Contains(errMsg, limitUnOptimizedRegexMatcher) {
reason = reasonUnOptimizedRegexMatcher
}
} else if statusCode == http.StatusServiceUnavailable && error != nil {
errMsg := error.Error()
Expand Down
117 changes: 113 additions & 4 deletions pkg/ingester/ingester.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ type Config struct {
// instead of being used for postings selection.
EnableMatcherOptimization bool `yaml:"enable_matcher_optimization"`

// Enable regex matcher limits and metrics collection for unoptimized regex queries.
// When enabled, the ingester will track pattern length, label cardinality, and total value length
// for unoptimized regex matchers, and enforce per-tenant limits if configured.
EnableRegexMatcherLimits bool `yaml:"enable_regex_matcher_limits"`

QueryProtection configs.QueryProtection `yaml:"query_protection"`
}

Expand Down Expand Up @@ -205,7 +210,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&cfg.MatchersCacheMaxItems, "ingester.matchers-cache-max-items", 0, "Maximum number of entries in the regex matchers cache. 0 to disable.")
f.BoolVar(&cfg.SkipMetadataLimits, "ingester.skip-metadata-limits", true, "If enabled, the metadata API returns all metadata regardless of the limits.")
f.BoolVar(&cfg.EnableMatcherOptimization, "ingester.enable-matcher-optimization", false, "Enable optimization of label matchers when query chunks. When enabled, matchers with low selectivity such as =~.+ are applied lazily during series scanning instead of being used for postings matching.")

f.BoolVar(&cfg.EnableRegexMatcherLimits, "ingester.enable-regex-matcher-limits", false, "Enable regex matcher limits and metrics collection for unoptimized regex queries. When enabled, the ingester will track pattern length, label cardinality, and total value length for unoptimized regex matchers.")
cfg.DefaultLimits.RegisterFlagsWithPrefix(f, "ingester.")
cfg.QueryProtection.RegisterFlagsWithPrefix(f, "ingester.")
}
Expand Down Expand Up @@ -813,7 +818,8 @@ func New(cfg Config, limits *validation.Overrides, registerer prometheus.Registe
i.ingestionRate,
&i.maxInflightPushRequests,
&i.maxInflightQueryRequests,
cfg.BlocksStorageConfig.TSDB.PostingsCache.Blocks.Enabled || cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled)
cfg.BlocksStorageConfig.TSDB.PostingsCache.Blocks.Enabled || cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled,
cfg.EnableRegexMatcherLimits)
i.validateMetrics = validation.NewValidateMetrics(registerer)

// Replace specific metrics which we can't directly track but we need to read
Expand Down Expand Up @@ -911,6 +917,7 @@ func NewForFlusher(cfg Config, limits *validation.Overrides, registerer promethe
&i.maxInflightPushRequests,
&i.maxInflightQueryRequests,
cfg.BlocksStorageConfig.TSDB.PostingsCache.Blocks.Enabled || cfg.BlocksStorageConfig.TSDB.PostingsCache.Head.Enabled,
cfg.EnableRegexMatcherLimits,
)

i.TSDBState.shipperIngesterID = "flusher"
Expand Down Expand Up @@ -2428,7 +2435,7 @@ func (i *Ingester) queryStream(ctx context.Context, userID string, req *client.Q
numSeries := 0
totalDataBytes := 0
numChunks := 0
numSeries, numSamples, totalDataBytes, numChunks, err = i.queryStreamChunks(ctx, db, int64(from), int64(through), matchers, shardMatcher, stream)
numSeries, numSamples, totalDataBytes, numChunks, err = i.queryStreamChunks(ctx, userID, db, int64(from), int64(through), matchers, shardMatcher, stream)

if err != nil {
return err
Expand Down Expand Up @@ -2467,14 +2474,116 @@ func (i *Ingester) trackInflightQueryRequest() (func(), error) {
}, nil
}

func isRegexUnOptimized(matcher *labels.Matcher) bool {
if matcher.Type != labels.MatchRegexp {
return false
}
// PostingsForMatchers will optimize .* and .+ matchers, so we don't need to check them.
if matcher.Value == ".*" || matcher.Value == ".+" {
return false
}
return !matcher.IsRegexOptimized()
}

// checkRegexMatcherLimits validates regex matchers against configured limits to prevent expensive queries.
func (i *Ingester) checkRegexMatcherLimits(ctx context.Context, userID string, db *userTSDB, matchers []*labels.Matcher, from, through int64) error {
// Collect all unoptimized regex matchers upfront
var unoptimizedMatchers []*labels.Matcher
for _, matcher := range matchers {
if isRegexUnOptimized(matcher) {
unoptimizedMatchers = append(unoptimizedMatchers, matcher)
// Record pattern length metric
if i.metrics.unoptimizedRegexPatternLength != nil {
i.metrics.unoptimizedRegexPatternLength.Observe(float64(len(matcher.Value)))
}
}
}

if len(unoptimizedMatchers) == 0 {
return nil
}

// Check pattern length limit if configured
maxPatternLength := i.limits.MaxRegexPatternLength(userID)
if maxPatternLength > 0 {
for _, matcher := range unoptimizedMatchers {
patternLength := len(matcher.Value)
if patternLength > maxPatternLength {
return validation.LimitError(fmt.Sprintf(
"regex pattern length %d exceeds limit %d for unoptimized regex matcher %q. Consider using a more specific pattern.",
patternLength, maxPatternLength, matcher.String(),
))
}
}
}

// Query TSDB to collect cardinality and total value length metrics and check limits.
labelQuerier, err := db.Querier(from, through)
if err != nil {
return err
}
defer labelQuerier.Close()

maxCardinality := i.limits.MaxLabelCardinalityForUnoptimizedRegex(userID)
maxTotalValueLength := i.limits.MaxTotalLabelValueLengthForUnoptimizedRegex(userID)

for _, matcher := range unoptimizedMatchers {
labelVals, _, err := labelQuerier.LabelValues(ctx, matcher.Name, nil)
if err != nil {
// If we can't get label values, skip this matcher and continue checking others
continue
}

cardinality := len(labelVals)

// Calculate total length of all values
var totalValueLength int
for _, val := range labelVals {
totalValueLength += len(val)
}

// Always record metrics regardless of whether limits are configured (if metrics are enabled)
if i.metrics.unoptimizedRegexLabelCardinality != nil {
i.metrics.unoptimizedRegexLabelCardinality.Observe(float64(cardinality))
}
if i.metrics.unoptimizedRegexTotalValueLength != nil {
i.metrics.unoptimizedRegexTotalValueLength.Observe(float64(totalValueLength))
}

// Check limits only if configured
if maxCardinality > 0 && cardinality > maxCardinality {
return validation.LimitError(fmt.Sprintf(
"label %q has cardinality %d which exceeds limit %d for unoptimized regex matcher %q. Consider using a more specific matcher.",
matcher.Name, cardinality, maxCardinality, matcher.String(),
))
}

if maxTotalValueLength > 0 && totalValueLength > maxTotalValueLength {
return validation.LimitError(fmt.Sprintf(
"label %q has total value length %d bytes (across %d values) which exceeds limit %d for unoptimized regex matcher %q. Consider using a more specific matcher.",
matcher.Name, totalValueLength, cardinality, maxTotalValueLength, matcher.String(),
))
}
}

return nil
}

// queryStreamChunks streams metrics from a TSDB. This implements the client.IngesterServer interface
func (i *Ingester) queryStreamChunks(ctx context.Context, db *userTSDB, from, through int64, matchers []*labels.Matcher, sm *storepb.ShardMatcher, stream client.Ingester_QueryStreamServer) (numSeries, numSamples, totalBatchSizeBytes, numChunks int, _ error) {
func (i *Ingester) queryStreamChunks(ctx context.Context, userID string, db *userTSDB, from, through int64, matchers []*labels.Matcher, sm *storepb.ShardMatcher, stream client.Ingester_QueryStreamServer) (numSeries, numSamples, totalBatchSizeBytes, numChunks int, _ error) {
q, err := db.ChunkQuerier(from, through)
if err != nil {
return 0, 0, 0, 0, err
}
defer q.Close()

// Check regex matcher limits before executing query if enabled
if i.cfg.EnableRegexMatcherLimits {
if err := i.checkRegexMatcherLimits(ctx, userID, db, matchers, from, through); err != nil {
return 0, 0, 0, 0, err
}
}

c, err := i.trackInflightQueryRequest()
if err != nil {
return 0, 0, 0, 0, err
Expand Down
Loading
Loading