Skip to content

Commit

Permalink
Initial Commit of already exiting monitors
Browse files Browse the repository at this point in the history
  • Loading branch information
echo-devnull committed Apr 9, 2021
1 parent 0997bd9 commit 86317ba
Show file tree
Hide file tree
Showing 23 changed files with 909 additions and 1 deletion.
52 changes: 52 additions & 0 deletions OOM_error-variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
variable "oom_error_enabled" {
type = bool
default = true
description = "oom_error in redis"
}

variable "oom_error_warning" {
type = number
default = 1
# 1 oom_errors
}

variable "oom_error_critical" {
type = number
default = 5
# 5 oom_errors
}

variable "oom_error_evaluation_period" {
type = string
default = "last_5m"
}

variable "oom_error_severity" {
type = string
default = "critical"
}

variable "oom_error_note" {
type = string
default = ""
}

variable "oom_error_docs" {
type = string
default = ""
}

variable "oom_error_filter_override" {
type = string
default = ""
}

variable "oom_error_alerting_enabled" {
type = bool
default = true
}

variable "type" {
type = string
default = "log alert"
}
39 changes: 39 additions & 0 deletions OOM_error.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
locals {
oom_error_filter = coalesce(
var.oom_error_filter_override,
var.filter_str
)
}

module "redis_oom_error" {
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"

name = "Out Of Memory errors in the logs"
query = "logs(\"OOM command not allowed when used memory \\> 'maxmemory'.\").index(\"*\").rollup(\"count\").last(\"15m\") > ${var.oom_error_critical}"

enabled = var.oom_error_enabled
alerting_enabled = var.oom_error_alerting_enabled

alert_message = <<EOF
Available memory on ${var.service} has dropped so much we are getting OOM errors in the logging.
See: https://app.datadoghq.eu/logs?query=OOM%20command%20not%20allowed%20when%20used%20memory%20%3E%20%27maxmemory%27.&index=%2A&integration_id=&integration_short_name=&saved_view=19730&from_ts=1617618461039&to_ts=1617704861039&live=true&cols=host%2Cservice&stream_sort=service%2Cdesc&messageDisplay=inline&viz=stream
EOF
recovery_message = "No more OOM error log entries in the last 15 minutes."

service = var.service
env = var.alert_env
severity = var.oom_error_severity
note = var.oom_error_note
docs = var.oom_error_docs
additional_tags = var.additional_tags

notification_channel = var.notification_channel

require_full_window = true
locked = var.locked

type = var.type

critical_threshold = var.oom_error_critical
warning_threshold = var.oom_error_warning
}
18 changes: 17 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,17 @@
# terraform-datadog-redis
## Severity Levels

| Severity | Description |
| --- | --- |
| critical | P1 |
| major | P2 |
| minor | P3 |
| warning | P4 |

## Timeframes

available values:
```
last_#m (1, 5, 10, 15, or 30)
last_#h (1, 2, or 4)
last_1d
```
47 changes: 47 additions & 0 deletions blocked_clients-variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
variable "blocked_enabled" {
type = bool
default = true
description = "blocked in redis"
}

variable "blocked_warning" {
type = number
default = 5
# 10 blockeds
}

variable "blocked_critical" {
type = number
default = 10
# 20 blockeds
}

variable "blocked_evaluation_period" {
type = string
default = "last_5m"
}

variable "blocked_severity" {
type = string
default = "critical"
}

variable "blocked_note" {
type = string
default = ""
}

variable "blocked_docs" {
type = string
default = ""
}

variable "blocked_filter_override" {
type = string
default = ""
}

variable "blocked_alerting_enabled" {
type = bool
default = true
}
34 changes: 34 additions & 0 deletions blocked_clients.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
locals {
blocked_filter = coalesce(
var.blocked_filter_override,
var.filter_str
)
}

module "redis_blocked" {
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"

name = "Blocked Clients"
query = "avg(${var.blocked_evaluation_period}):avg:redis.clients.blocked{${local.blocked_filter}} >= ${var.blocked_critical}"

enabled = var.blocked_enabled
alerting_enabled = var.blocked_alerting_enabled

alert_message = "${var.service} is waiting to fill a request with data. Until the data is filled, the client is blocked. Current threshold: {{threshold}} and is currently at {{value}} blocked clients. This could indicate a latency issue or timeouts upstream."
recovery_message = "${var.service} blocked clients is back to {{value}}"

service = var.service
env = var.alert_env
severity = var.blocked_severity
note = var.blocked_note
docs = var.blocked_docs
additional_tags = var.additional_tags

notification_channel = var.notification_channel

require_full_window = true
locked = var.locked

critical_threshold = var.blocked_critical
warning_threshold = var.blocked_warning
}
34 changes: 34 additions & 0 deletions connected_client.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
locals {
connected_clients_filter = coalesce(
var.connected_clients_filter_override,
var.filter_str
)
}

module "redis_connected_clients" {
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"

name = "Connected Clients"
query = "avg(${var.connected_clients_evaluation_period}):avg:redis.net.clients{${local.connected_clients_filter}} >= ${var.connected_clients_critical}"

enabled = var.connected_clients_enabled
alerting_enabled = var.connected_clients_alerting_enabled

alert_message = "Amount of connected clients to ${var.service} has gone above {{threshold}} and is currently {{value}}%. This could indicate problems with upstream not responding quickly enough."
recovery_message = "Amount of connected clients to ${var.service} has recovered to {{value}}"

service = var.service
env = var.alert_env
severity = var.connected_clients_severity
note = var.connected_clients_note
docs = var.connected_clients_docs
additional_tags = var.additional_tags

notification_channel = var.notification_channel

require_full_window = true
locked = var.locked

critical_threshold = var.connected_clients_critical
warning_threshold = var.connected_clients_warning
}
47 changes: 47 additions & 0 deletions connected_clients-variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
variable "connected_clients_enabled" {
type = bool
default = true
description = "connected_clients in redis"
}

variable "connected_clients_warning" {
type = number
default = 20
# 20 connected_clientss
}

variable "connected_clients_critical" {
type = number
default = 30
# 30 connected_clientss
}

variable "connected_clients_evaluation_period" {
type = string
default = "last_5m"
}

variable "connected_clients_severity" {
type = string
default = "critical"
}

variable "connected_clients_note" {
type = string
default = ""
}

variable "connected_clients_docs" {
type = string
default = ""
}

variable "connected_clients_filter_override" {
type = string
default = ""
}

variable "connected_clients_alerting_enabled" {
type = bool
default = true
}
47 changes: 47 additions & 0 deletions evicted_keys-variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
variable "eviction_enabled" {
type = bool
default = true
description = "eviction in redis"
}

variable "eviction_warning" {
type = number
default = 10
# 10 evictions
}

variable "eviction_critical" {
type = number
default = 20
# 20 evictions
}

variable "eviction_evaluation_period" {
type = string
default = "last_5m"
}

variable "eviction_severity" {
type = string
default = "critical"
}

variable "eviction_note" {
type = string
default = ""
}

variable "eviction_docs" {
type = string
default = ""
}

variable "eviction_filter_override" {
type = string
default = ""
}

variable "eviction_alerting_enabled" {
type = bool
default = true
}
34 changes: 34 additions & 0 deletions evicted_keys.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
locals {
eviction_filter = coalesce(
var.eviction_filter_override,
var.filter_str
)
}

module "redis_eviction" {
source = "[email protected]:kabisa/terraform-datadog-generic-monitor.git?ref=0.5"

name = "Eviction Rate"
query = "avg(${var.eviction_evaluation_period}):avg:redis.keys.evicted{${local.eviction_filter}} >= ${var.eviction_critical}"

enabled = var.eviction_enabled
alerting_enabled = var.eviction_alerting_enabled

alert_message = "Memory usage on ${var.service} has gone so high, it needs to start evicting keys. Current threshold: {{threshold}} and is eviction rate: {{value}}"
recovery_message = "${var.service} is evicting keys at the rate of {{value}}"

service = var.service
env = var.alert_env
severity = var.eviction_severity
note = var.eviction_note
docs = var.eviction_docs
additional_tags = var.additional_tags

notification_channel = var.notification_channel

require_full_window = true
locked = var.locked

critical_threshold = var.eviction_critical
warning_threshold = var.eviction_warning
}
Loading

0 comments on commit 86317ba

Please sign in to comment.