Skip to content

Commit 28ffcb6

Browse files
yhabteaboxzi
authored andcommitted
Retrieve host and service names from Redis
Instead of retrieving the host and service names from the used RDBMs, this commit allows us to query them from Redis. This is done to avoid the overhead of database queries, especially when the host and service names are always to be found in Redis. The previous implementation simply perfomed two database queries with each received entity based on their IDs, but we can perform this operation more efficiently from Redis using the same filtering logic as before. Of course, we now have to maintain more code needed to handle the Redis operations, but this is a trade-off we should be willing to make for performance reasons.
1 parent 0e943ff commit 28ffcb6

File tree

4 files changed

+122
-57
lines changed

4 files changed

+122
-57
lines changed

cmd/icingadb/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ func run() int {
177177
notificationsSource := notifications.NewNotificationsSource(
178178
ctx,
179179
db,
180+
rc,
180181
logs.GetChildLogger("notifications-source"),
181182
cfg)
182183
notificationsSourceCallback = notificationsSource.Submit

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ require (
1313
github.com/mattn/go-sqlite3 v1.14.32
1414
github.com/okzk/sdnotify v0.0.0-20180710141335-d9becc38acbd
1515
github.com/pkg/errors v0.9.1
16+
github.com/redis/go-redis/v9 v9.10.0
1617
github.com/stretchr/testify v1.11.1
1718
github.com/vbauerster/mpb/v6 v6.0.4
1819
go.uber.org/zap v1.27.0
@@ -34,7 +35,6 @@ require (
3435
github.com/mattn/go-isatty v0.0.20 // indirect
3536
github.com/mattn/go-runewidth v0.0.12 // indirect
3637
github.com/pmezard/go-difflib v1.0.0 // indirect
37-
github.com/redis/go-redis/v9 v9.10.0 // indirect
3838
github.com/rivo/uniseg v0.2.0 // indirect
3939
github.com/ssgreg/journald v1.0.0 // indirect
4040
go.uber.org/multierr v1.11.0 // indirect

pkg/notifications/notifications.go

Lines changed: 31 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@ import (
88
"sync"
99
"time"
1010

11-
"github.com/icinga/icinga-go-library/backoff"
1211
"github.com/icinga/icinga-go-library/database"
1312
"github.com/icinga/icinga-go-library/logging"
1413
"github.com/icinga/icinga-go-library/notifications"
1514
"github.com/icinga/icinga-go-library/notifications/event"
16-
"github.com/icinga/icinga-go-library/retry"
15+
"github.com/icinga/icinga-go-library/redis"
1716
"github.com/icinga/icinga-go-library/types"
1817
"github.com/icinga/icinga-go-library/utils"
1918
"github.com/icinga/icingadb/pkg/common"
@@ -41,6 +40,7 @@ type Source struct {
4140
ctxCancel context.CancelFunc
4241

4342
notificationsClient *notifications.Client // The Icinga Notifications client used to interact with the API.
43+
redisClient *redis.Client // redisClient is the Redis client used to fetch host and service names for events.
4444
}
4545

4646
// NewNotificationsSource creates a new Source connected to an existing database and logger.
@@ -49,6 +49,7 @@ type Source struct {
4949
func NewNotificationsSource(
5050
ctx context.Context,
5151
db *database.DB,
52+
rc *redis.Client,
5253
logger *logging.Logger,
5354
cfg notifications.Config,
5455
) *Source {
@@ -61,7 +62,8 @@ func NewNotificationsSource(
6162
db: db,
6263
logger: logger,
6364

64-
rules: &notifications.SourceRulesInfo{Version: notifications.EmptyRulesVersion},
65+
rules: &notifications.SourceRulesInfo{Version: notifications.EmptyRulesVersion},
66+
redisClient: rc,
6567

6668
ctx: ctx,
6769
ctxCancel: ctxCancel,
@@ -136,63 +138,36 @@ func (s *Source) evaluateRulesForObject(ctx context.Context, entity database.Ent
136138
return outRuleIds[:len(outRuleIds):len(outRuleIds)], nil
137139
}
138140

139-
// fetchHostServiceName for a host ID and a potential service ID from the Icinga DB relational database.
140-
func (s *Source) fetchHostServiceName(ctx context.Context, hostId, serviceId, envId types.Binary) (host, service string, err error) {
141-
err = retry.WithBackoff(
142-
ctx,
143-
func(ctx context.Context) error {
144-
queryHost := s.db.Rebind("SELECT name FROM host WHERE id = ? AND environment_id = ?")
145-
err := s.db.QueryRowxContext(ctx, queryHost, hostId, envId).Scan(&host)
146-
if err != nil {
147-
return errors.Wrap(err, "cannot select host")
148-
}
149-
150-
if serviceId != nil {
151-
queryService := s.db.Rebind("SELECT name FROM service WHERE id = ? AND environment_id = ?")
152-
err := s.db.QueryRowxContext(ctx, queryService, serviceId, envId).Scan(&service)
153-
if err != nil {
154-
return errors.Wrap(err, "cannot select service")
155-
}
156-
}
157-
158-
return nil
159-
},
160-
retry.Retryable,
161-
backoff.DefaultBackoff,
162-
retry.Settings{Timeout: retry.DefaultTimeout})
163-
return
164-
}
165-
166141
// buildCommonEvent creates an event.Event based on Host and (optional) Service names.
167142
//
168143
// This function is used by all event builders to create a common event structure that includes the host and service
169144
// names, the absolute URL to the Icinga Web 2 Icinga DB page for the host or service, and the tags for the event.
170145
// Any event type-specific information (like severity, message, etc.) is added by the specific event builders.
171-
func (s *Source) buildCommonEvent(host, service string) (*event.Event, error) {
146+
func (s *Source) buildCommonEvent(rlr *redisLookupResult) (*event.Event, error) {
172147
var (
173148
eventName string
174149
eventUrl *url.URL
175150
eventTags map[string]string
176151
)
177152

178-
if service != "" {
179-
eventName = host + "!" + service
153+
if rlr.ServiceName != "" {
154+
eventName = rlr.HostName + "!" + rlr.ServiceName
180155

181156
eventUrl = s.notificationsClient.JoinIcingaWeb2Path("/icingadb/service")
182-
eventUrl.RawQuery = "name=" + utils.RawUrlEncode(service) + "&host.name=" + utils.RawUrlEncode(host)
157+
eventUrl.RawQuery = "name=" + utils.RawUrlEncode(rlr.ServiceName) + "&host.name=" + utils.RawUrlEncode(rlr.HostName)
183158

184159
eventTags = map[string]string{
185-
"host": host,
186-
"service": service,
160+
"host": rlr.HostName,
161+
"service": rlr.ServiceName,
187162
}
188163
} else {
189-
eventName = host
164+
eventName = rlr.HostName
190165

191166
eventUrl = s.notificationsClient.JoinIcingaWeb2Path("/icingadb/host")
192-
eventUrl.RawQuery = "name=" + utils.RawUrlEncode(host)
167+
eventUrl.RawQuery = "name=" + utils.RawUrlEncode(rlr.HostName)
193168

194169
eventTags = map[string]string{
195-
"host": host,
170+
"host": rlr.HostName,
196171
}
197172
}
198173

@@ -208,19 +183,19 @@ func (s *Source) buildCommonEvent(host, service string) (*event.Event, error) {
208183
// The resulted event will have all the necessary information for a state change event, and must
209184
// not be further modified by the caller.
210185
func (s *Source) buildStateHistoryEvent(ctx context.Context, h *v1history.StateHistory) (*event.Event, error) {
211-
hostName, serviceName, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId, h.EnvironmentId)
186+
res, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId)
212187
if err != nil {
213-
return nil, errors.Wrap(err, "cannot fetch host/service information")
188+
return nil, err
214189
}
215190

216-
ev, err := s.buildCommonEvent(hostName, serviceName)
191+
ev, err := s.buildCommonEvent(res)
217192
if err != nil {
218-
return nil, errors.Wrapf(err, "cannot build event for %q,%q", hostName, serviceName)
193+
return nil, errors.Wrapf(err, "cannot build event for %q,%q", res.HostName, res.ServiceName)
219194
}
220195

221196
ev.Type = event.TypeState
222197

223-
if serviceName != "" {
198+
if res.ServiceName != "" {
224199
switch h.HardState {
225200
case 0:
226201
ev.Severity = event.SeverityOK
@@ -256,14 +231,14 @@ func (s *Source) buildStateHistoryEvent(ctx context.Context, h *v1history.StateH
256231

257232
// buildDowntimeHistoryEvent from a downtime history entry.
258233
func (s *Source) buildDowntimeHistoryEvent(ctx context.Context, h *v1history.DowntimeHistory) (*event.Event, error) {
259-
hostName, serviceName, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId, h.EnvironmentId)
234+
res, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId)
260235
if err != nil {
261-
return nil, errors.Wrap(err, "cannot fetch host/service information")
236+
return nil, err
262237
}
263238

264-
ev, err := s.buildCommonEvent(hostName, serviceName)
239+
ev, err := s.buildCommonEvent(res)
265240
if err != nil {
266-
return nil, errors.Wrapf(err, "cannot build event for %q,%q", hostName, serviceName)
241+
return nil, errors.Wrapf(err, "cannot build event for %q,%q", res.HostName, res.ServiceName)
267242
}
268243

269244
if h.HasBeenCancelled.Valid && h.HasBeenCancelled.Bool {
@@ -289,14 +264,14 @@ func (s *Source) buildDowntimeHistoryEvent(ctx context.Context, h *v1history.Dow
289264

290265
// buildFlappingHistoryEvent from a flapping history entry.
291266
func (s *Source) buildFlappingHistoryEvent(ctx context.Context, h *v1history.FlappingHistory) (*event.Event, error) {
292-
hostName, serviceName, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId, h.EnvironmentId)
267+
res, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId)
293268
if err != nil {
294-
return nil, errors.Wrap(err, "cannot fetch host/service information")
269+
return nil, err
295270
}
296271

297-
ev, err := s.buildCommonEvent(hostName, serviceName)
272+
ev, err := s.buildCommonEvent(res)
298273
if err != nil {
299-
return nil, errors.Wrapf(err, "cannot build event for %q,%q", hostName, serviceName)
274+
return nil, errors.Wrapf(err, "cannot build event for %q,%q", res.HostName, res.ServiceName)
300275
}
301276

302277
if h.PercentStateChangeEnd.Valid {
@@ -320,14 +295,14 @@ func (s *Source) buildFlappingHistoryEvent(ctx context.Context, h *v1history.Fla
320295

321296
// buildAcknowledgementHistoryEvent from an acknowledgment history entry.
322297
func (s *Source) buildAcknowledgementHistoryEvent(ctx context.Context, h *v1history.AcknowledgementHistory) (*event.Event, error) {
323-
hostName, serviceName, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId, h.EnvironmentId)
298+
res, err := s.fetchHostServiceName(ctx, h.HostId, h.ServiceId)
324299
if err != nil {
325-
return nil, errors.Wrap(err, "cannot fetch host/service information")
300+
return nil, err
326301
}
327302

328-
ev, err := s.buildCommonEvent(hostName, serviceName)
303+
ev, err := s.buildCommonEvent(res)
329304
if err != nil {
330-
return nil, errors.Wrapf(err, "cannot build event for %q,%q", hostName, serviceName)
305+
return nil, errors.Wrapf(err, "cannot build event for %q,%q", res.HostName, res.ServiceName)
331306
}
332307

333308
if !h.ClearTime.Time().IsZero() {

pkg/notifications/redis_fetch.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package notifications
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"errors"
7+
"fmt"
8+
"time"
9+
10+
"github.com/icinga/icinga-go-library/backoff"
11+
"github.com/icinga/icinga-go-library/retry"
12+
"github.com/icinga/icinga-go-library/types"
13+
"github.com/redis/go-redis/v9"
14+
)
15+
16+
// fetchHostServiceName retrieves the host and service names from Redis.
17+
//
18+
// It uses either the hostId or/and serviceId to fetch the corresponding names. If both are provided,
19+
// the returned result will contain the host name and the service name accordingly. Otherwise, it will
20+
// only contain the host name.
21+
//
22+
// Internally, it uses the Redis HGet command to fetch the data from the "icinga:host" and "icinga:service" hashes.
23+
// If this operation couldn't be completed within a reasonable time (a hard coded 5 seconds), it will cancel the
24+
// request and return an error indicating that the operation timed out. In case of the serviceId being set, the
25+
// maximum execution time of the Redis HGet commands is 10s (5s for each HGet call).
26+
func (s *Source) fetchHostServiceName(ctx context.Context, hostId, serviceId types.Binary) (*redisLookupResult, error) {
27+
redisHGet := func(typ, field string, out *redisLookupResult) error {
28+
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
29+
defer cancel()
30+
31+
err := retry.WithBackoff(
32+
ctx,
33+
func(ctx context.Context) error { return s.redisClient.HGet(ctx, "icinga:"+typ, field).Scan(out) },
34+
retry.Retryable,
35+
backoff.DefaultBackoff,
36+
retry.Settings{},
37+
)
38+
if err != nil {
39+
if errors.Is(err, redis.Nil) {
40+
return fmt.Errorf("%s with ID %s not found in Redis", typ, hostId)
41+
}
42+
return fmt.Errorf("failed to fetch %s with ID %s from Redis: %w", typ, field, err)
43+
}
44+
return nil
45+
}
46+
47+
var result redisLookupResult
48+
if err := redisHGet("host", hostId.String(), &result); err != nil {
49+
return nil, err
50+
}
51+
52+
result.HostName = result.Name
53+
result.Name = "" // Clear the name field for the host, as we will fetch the service name next.
54+
55+
if serviceId != nil {
56+
if err := redisHGet("service", serviceId.String(), &result); err != nil {
57+
return nil, err
58+
}
59+
result.ServiceName = result.Name
60+
result.Name = "" // It's not needed anymore, clear it!
61+
}
62+
63+
return &result, nil
64+
}
65+
66+
// redisLookupResult defines the structure of the Redis message we're interested in.
67+
type redisLookupResult struct {
68+
HostName string `json:"-"` // Name of the host (never empty).
69+
ServiceName string `json:"-"` // Name of the service (only set in service context).
70+
71+
// Name is used to retrieve the host or service name from Redis.
72+
// It should not be used for any other purpose apart from within the [Source.fetchHostServiceName] function.
73+
Name string `json:"name"`
74+
}
75+
76+
// UnmarshalBinary implements the [encoding.BinaryUnmarshaler] interface for redisLookupResult.
77+
//
78+
// It unmarshals the binary data of the Redis HGet result into the redisLookupResult struct.
79+
// This is required for the HGet().Scan() usage in the [Source.fetchHostServiceName] function to work correctly.
80+
func (rlr *redisLookupResult) UnmarshalBinary(data []byte) error {
81+
if len(data) == 0 {
82+
return errors.New("empty data received for redisLookupResult")
83+
}
84+
85+
if err := json.Unmarshal(data, rlr); err != nil {
86+
return fmt.Errorf("failed to unmarshal redis result: %w", err)
87+
}
88+
return nil
89+
}

0 commit comments

Comments
 (0)