Skip to content

Add flag to skip checkKeys if role == master #985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 14 additions & 15 deletions exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
BuildInfo BuildInfo
BasicAuthUsername string
BasicAuthPassword string
SkipCheckKeysForRoleMaster bool
}

// NewRedisExporter returns a new exporter of Redis metrics.
Expand Down Expand Up @@ -197,7 +198,7 @@
"expired_stale_perc": "expired_stale_percentage",

// https://github.com/antirez/redis/blob/17bf0b25c1171486e3a1b089f3181fff2bc0d4f0/src/evict.c#L349-L352
// ... the sum of AOF and slaves buffer ....
// ... the sum of AOF and slaves buffer ...
"mem_not_counted_for_evict": "mem_not_counted_for_eviction_bytes",
"mem_total_replication_buffers": "mem_total_replication_buffers_bytes", // Added in Redis 7.0
"mem_overhead_db_hashtable_rehashing": "mem_overhead_db_hashtable_rehashing_bytes", // Added in Redis 7.4
Expand Down Expand Up @@ -720,38 +721,36 @@
}
} else if dbCount == 0 {
// in non-cluster mode, if dbCount is zero then "CONFIG" failed to retrieve a valid
// number of databases and we use the Redis config default which is 16
// number of databases, and we use the Redis config default which is 16

dbCount = 16
}

log.Debugf("dbCount: %d", dbCount)

e.extractInfoMetrics(ch, infoAll, dbCount)
role := e.extractInfoMetrics(ch, infoAll, dbCount)

if !e.options.ExcludeLatencyHistogramMetrics {
e.extractLatencyMetrics(ch, infoAll, c)
}

if e.options.IsCluster {
clusterClient, err := e.connectToRedisCluster()
if err != nil {
log.Errorf("Couldn't connect to redis cluster")
return err
// skip these metrics for master if SkipCheckKeysForRoleMaster is set
// (can help with reducing workload on the master node)
log.Infof("checkKeys metric collection for role: %s flag: %#v", role, e.options.SkipCheckKeysForRoleMaster)
if role == InstanceRoleSlave || !e.options.SkipCheckKeysForRoleMaster {
if err := e.extractCheckKeyMetrics(ch, c); err != nil {
log.Errorf("extractCheckKeyMetrics() err: %s", err)

Check warning on line 742 in exporter/exporter.go

View check run for this annotation

Codecov / codecov/patch

exporter/exporter.go#L742

Added line #L742 was not covered by tests
}
defer clusterClient.Close()

e.extractCheckKeyMetrics(ch, clusterClient)
e.extractCountKeysMetrics(ch, c)

e.extractStreamMetrics(ch, c)
} else {
e.extractCheckKeyMetrics(ch, c)
log.Infof("skipping checkKeys metrics, role: %s flag: %#v", role, e.options.SkipCheckKeysForRoleMaster)
}

e.extractSlowLogMetrics(ch, c)

e.extractStreamMetrics(ch, c)

e.extractCountKeysMetrics(ch, c)

e.extractKeyGroupMetrics(ch, c, dbCount)

if strings.Contains(infoAll, "# Sentinel") {
Expand Down
94 changes: 54 additions & 40 deletions exporter/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,12 @@ import (
log "github.com/sirupsen/logrus"
)

var (
dbNumStr = "11"
altDBNumStr = "12"
anotherAltDbNumStr = "14"

const (
dbNumStr = "11"
altDBNumStr = "12"
invalidDBNumStr = "16"
dbNumStrFull = fmt.Sprintf("db%s", dbNumStr)

anotherAltDbNumStr = "14"
)

const (
Expand All @@ -44,11 +43,6 @@ const (
)

var (
testKeySingleString string
testKeys []string
testKeysExpiring []string
testKeysList []string

AllTestKeys = []string{
TestKeysSetName, TestKeysZSetName,
TestKeysStreamName,
Expand All @@ -57,6 +51,21 @@ var (
}
)

var (
testKeys []string
testKeysExpiring []string
testKeysList []string

dbNumStrFull = fmt.Sprintf("db%s", dbNumStr)
)

var (
TestKeyNameSingleString = "" // initialized with a timestamp at runtime
TestKeyNameSet = "test-set"
TestKeyNameStream = "test-stream"
TestKeyNameHll = "test-hll"
)

func getTestExporter() *Exporter {
return getTestExporterWithOptions(Options{Namespace: "test", Registry: prometheus.NewRegistry()})
}
Expand Down Expand Up @@ -111,30 +120,30 @@ func setupKeys(t *testing.T, c redis.Conn, dbNum string) error {
}
}

if _, err := doRedisCmd(c, "PFADD", TestKeysHllName, "val1"); err != nil {
if _, err := c.Do("PFADD", TestKeyNameHll, "val1"); err != nil {
t.Errorf("PFADD err: %s", err)
return err
}
if _, err := doRedisCmd(c, "PFADD", TestKeysHllName, "val22"); err != nil {
if _, err := c.Do("PFADD", TestKeyNameHll, "val22"); err != nil {
t.Errorf("PFADD err: %s", err)
return err
}
if _, err := doRedisCmd(c, "PFADD", TestKeysHllName, "val333"); err != nil {
if _, err := c.Do("PFADD", TestKeyNameHll, "val333"); err != nil {
t.Errorf("PFADD err: %s", err)
return err
}

if _, err := doRedisCmd(c, "SADD", TestKeysSetName, "test-val-1"); err != nil {
if _, err := c.Do("SADD", TestKeyNameSet, "test-val-1"); err != nil {
t.Errorf("SADD err: %s", err)
return err
}
if _, err := doRedisCmd(c, "SADD", TestKeysSetName, "test-val-2"); err != nil {
if _, err := c.Do("SADD", TestKeyNameSet, "test-val-2"); err != nil {
t.Errorf("SADD err: %s", err)
return err
}

if _, err := doRedisCmd(c, "ZADD", TestKeysZSetName, "12", "test-zzzval-1"); err != nil {
t.Errorf("ZADD err: %s", err)
if _, err := c.Do("SET", TestKeyNameSingleString, "this-is-a-string"); err != nil {
t.Errorf("PFADD err: %s", err)
return err
}
if _, err := doRedisCmd(c, "ZADD", TestKeysZSetName, "23", "test-zzzval-2"); err != nil {
Expand All @@ -146,8 +155,8 @@ func setupKeys(t *testing.T, c redis.Conn, dbNum string) error {
return err
}

if _, err := doRedisCmd(c, "SET", testKeySingleString, "this-is-a-string"); err != nil {
t.Errorf("SET %s err: %s", testKeySingleString, err)
if _, err := doRedisCmd(c, "SET", TestKeyNameSingleString, "this-is-a-string"); err != nil {
t.Errorf("SET %s err: %s", TestKeyNameSingleString, err)
return err
}

Expand All @@ -173,17 +182,15 @@ func setupKeys(t *testing.T, c redis.Conn, dbNum string) error {
}

// Create test streams
doRedisCmd(c, "XGROUP", "CREATE", TestKeysStreamName, TestKeyGroup1, "$", "MKSTREAM")
doRedisCmd(c, "XGROUP", "CREATE", TestKeysStreamName, TestKeyGroup2, "$", "MKSTREAM")
doRedisCmd(c, "XADD", TestKeysStreamName, TestStreamTimestamps[0], "field_1", "str_1")
doRedisCmd(c, "XADD", TestKeysStreamName, TestStreamTimestamps[1], "field_2", "str_2")
c.Do("XGROUP", "CREATE", TestKeyNameStream, "test_group_1", "$", "MKSTREAM")
c.Do("XGROUP", "CREATE", TestKeyNameStream, "test_group_2", "$", "MKSTREAM")
c.Do("XADD", TestKeyNameStream, TestStreamTimestamps[0], "field_1", "str_1")
c.Do("XADD", TestKeyNameStream, TestStreamTimestamps[1], "field_2", "str_2")

// Process messages to assign Consumers to their groups
doRedisCmd(c, "XREADGROUP", "GROUP", TestKeyGroup1, "test_consumer_1", "COUNT", "1", "STREAMS", TestKeysStreamName, ">")
doRedisCmd(c, "XREADGROUP", "GROUP", TestKeyGroup1, "test_consumer_2", "COUNT", "1", "STREAMS", TestKeysStreamName, ">")
doRedisCmd(c, "XREADGROUP", "GROUP", TestKeyGroup2, "test_consumer_1", "COUNT", "1", "STREAMS", TestKeysStreamName, "0")

t.Logf("setupKeys %s - DONE", dbNum)
c.Do("XREADGROUP", "GROUP", "test_group_1", "test_consumer_1", "COUNT", "1", "STREAMS", TestKeyNameStream, ">")
c.Do("XREADGROUP", "GROUP", "test_group_1", "test_consumer_2", "COUNT", "1", "STREAMS", TestKeyNameStream, ">")
c.Do("XREADGROUP", "GROUP", "test_group_2", "test_consumer_1", "COUNT", "1", "STREAMS", TestKeyNameStream, "0")

time.Sleep(time.Millisecond * 100)
return nil
Expand All @@ -198,6 +205,19 @@ func deleteKeys(c redis.Conn, dbNum string) {
for _, key := range AllTestKeys {
doRedisCmd(c, "DEL", key)
}

for _, key := range testKeysExpiring {
c.Do("DEL", key)
}

for _, key := range testKeysList {
c.Do("DEL", key)
}

c.Do("DEL", TestKeyNameHll)
c.Do("DEL", TestKeyNameSet)
c.Do("DEL", TestKeyNameStream)
c.Do("DEL", TestKeyNameSingleString)
}

func setupTestKeys(t *testing.T, uri string) {
Expand Down Expand Up @@ -273,10 +293,11 @@ func deleteTestKeys(t *testing.T, addr string) error {
return nil
}

func deleteTestKeysCluster(addr string) error {
func deleteTestKeysCluster(t *testing.T, addr string) error {
e, _ := NewRedisExporter(addr, Options{})
c, err := e.connectToRedisCluster()
if err != nil {
t.Errorf("couldn't setup redis CLUSTER, err: %s ", err)
return err
}

Expand Down Expand Up @@ -433,15 +454,9 @@ func TestKeysReset(t *testing.T) {
setupTestKeys(t, os.Getenv("TEST_REDIS_URI"))
defer deleteTestKeys(t, os.Getenv("TEST_REDIS_URI"))

chM := make(chan prometheus.Metric, 10000)
go func() {
e.Collect(chM)
close(chM)
}()

body := downloadURL(t, ts.URL+"/metrics")
if !strings.Contains(body, testKeys[0]) {
t.Errorf("Did not found key %q\n%s", testKeys[0], body)
t.Errorf("Did not find key %q\n%s", testKeys[0], body)
}

deleteTestKeys(t, os.Getenv("TEST_REDIS_URI"))
Expand Down Expand Up @@ -538,15 +553,14 @@ func init() {
testKeys = append(testKeys, fmt.Sprintf("key_%s_%d", n, testTimestamp))
}

testKeySingleString = fmt.Sprintf("key_string_%d", testTimestamp)
AllTestKeys = append(AllTestKeys, testKeySingleString)

TestKeyNameSingleString = fmt.Sprintf("key_string_%d", testTimestamp)
testKeysList = append(testKeysList, "test_beatles_list")

for _, n := range []string{"A.J.", "Howie", "Nick", "Kevin", "Brian"} {
testKeysExpiring = append(testKeysExpiring, fmt.Sprintf("key_exp_%s_%d", n, testTimestamp))
}

AllTestKeys = append(AllTestKeys, TestKeyNameSingleString)
AllTestKeys = append(AllTestKeys, testKeys...)
AllTestKeys = append(AllTestKeys, testKeysList...)
AllTestKeys = append(AllTestKeys, testKeysExpiring...)
Expand Down
6 changes: 3 additions & 3 deletions exporter/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TestHTTPScrapeMetricsEndpoints(t *testing.T) {
defer deleteTestKeys(t, os.Getenv("TEST_PWD_REDIS_URI"))

csk := dbNumStrFull + "=" + url.QueryEscape(testKeys[0]) // check-single-keys
css := dbNumStrFull + "=" + TestKeysStreamName // check-single-streams
css := dbNumStrFull + "=" + TestKeyNameStream // check-single-streams
cntk := dbNumStrFull + "=" + testKeys[0] + "*" // count-keys

u, err := url.Parse(os.Getenv("TEST_REDIS_URI"))
Expand Down Expand Up @@ -240,7 +240,7 @@ func TestSimultaneousMetricsHttpRequests(t *testing.T) {
os.Getenv("TEST_REDIS6_URI"),
os.Getenv("TEST_REDIS_MODULES_URI"),

// tile38 & Cluster need to be last in this list so we can identify them when selected, down in line 229
// tile38 & Cluster need to be last in this list, so we can identify them when selected, down in line 229
os.Getenv("TEST_REDIS_CLUSTER_MASTER_URI"),
os.Getenv("TEST_REDIS_CLUSTER_SLAVE_URI"),
os.Getenv("TEST_TILE38_URI"),
Expand All @@ -262,7 +262,7 @@ func TestSimultaneousMetricsHttpRequests(t *testing.T) {
v.Add("target", target)

// not appending this param for Tile38 and cluster (the last two in the list)
// Tile38 & cluster don't support the SELECT command so this test will fail and spam the logs
// Tile38 & cluster don't support the SELECT command, so this test will fail and spam the logs
if uriIdx < len(uris)-3 {
v.Add("check-single-keys", dbNumStrFull+"="+url.QueryEscape(testKeys[0]))
}
Expand Down
16 changes: 13 additions & 3 deletions exporter/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ var reMasterDirect = regexp.MustCompile(`^(master(_[0-9]+)?_(last_io_seconds_ago
slave0:ip=10.254.11.1,port=6379,state=online,offset=1751844676,lag=0
slave1:ip=10.254.11.2,port=6379,state=online,offset=1751844222,lag=0
*/

var reSlave = regexp.MustCompile(`^slave\d+`)

const (
InstanceRoleSlave = "slave"
)

func extractVal(s string) (val float64, err error) {
split := strings.Split(s, "=")
if len(split) != 2 {
Expand All @@ -60,7 +65,8 @@ func extractPercentileVal(s string) (percentile float64, val float64, err error)
return
}

func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string, dbCount int) {
// returns the role of the instance we're scraping (master or slave)
func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string, dbCount int) string {
keyValues := map[string]string{}
handledDBs := map[string]bool{}
cmdCount := map[string]uint64{}
Expand Down Expand Up @@ -161,8 +167,10 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
}
}

instanceRole := keyValues["role"]

e.registerConstMetricGauge(ch, "instance_info", 1,
keyValues["role"],
instanceRole,
keyValues["redis_version"],
keyValues["redis_build_id"],
keyValues["redis_mode"],
Expand All @@ -174,12 +182,14 @@ func (e *Exporter) extractInfoMetrics(ch chan<- prometheus.Metric, info string,
keyValues["master_replid"],
)

if keyValues["role"] == "slave" {
if instanceRole == InstanceRoleSlave {
e.registerConstMetricGauge(ch, "slave_info", 1,
keyValues["master_host"],
keyValues["master_port"],
keyValues["slave_read_only"])
}

return instanceRole
}

func (e *Exporter) generateCommandLatencySummaries(ch chan<- prometheus.Metric, cmdLatencyMap map[string]map[float64]float64, cmdCount map[string]uint64, cmdSum map[string]float64) {
Expand Down
Loading
Loading