Skip to content

Commit

Permalink
postpone check until check period starts
Browse files Browse the repository at this point in the history
Given a service with a 24h check interval and a office hours check period. If this
service is scheduled for whatever reasons outside office hours, it will be rescheduled
24hours again which is also outside office hours.
In the end, this service will never be checked again.

So, when naemon detects a check which could not be run because it is outside the check
period, postpone the check until the next slot in its check period.
  • Loading branch information
sni committed Jan 29, 2025
1 parent c082729 commit 9eed0dc
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 2 deletions.
26 changes: 26 additions & 0 deletions src/naemon/checks_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ static int run_async_host_check(host *hst, int check_options, double latency)

/* make sure this is a valid time to check the host */
if (check_time_against_period(time(NULL), hst->check_period_ptr) != OK) {
delay_host_if_next_check_is_outside_timeperiod(hst);
return ERROR;
}

Expand Down Expand Up @@ -1393,3 +1394,28 @@ static int determine_host_reachability(host *hst)
log_debug_info(DEBUGL_CHECKS, 2, "No parents were up, so host is UNREACHABLE.\n");
return STATE_UNREACHABLE;
}

/* ensure next check falls into check period */
void delay_host_if_next_check_is_outside_timeperiod(host *hst)
{
time_t timeperiod_start = time(NULL);

if(hst->next_check == 0) {
return;
}

if(check_time_against_period(hst->next_check, hst->check_period_ptr) == OK) {
return;
}

get_next_valid_time(hst->next_check, &timeperiod_start, hst->check_period_ptr);
if(timeperiod_start == 0) {
return;
}

// add random delay, so not all checks start at the same second
timeperiod_start += ranged_urand(0, retained_scheduling_randomize_window);

log_debug_info(DEBUGL_CHECKS, 1, "delay next service check for %s until check timeperiod starts: %s\n", hst->name, ctime(&timeperiod_start));
schedule_host_check(hst, timeperiod_start, CHECK_OPTION_ALLOW_POSTPONE);
}
3 changes: 3 additions & 0 deletions src/naemon/checks_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ int check_host_dependencies(host *hst, int dependency_type);
/* adjusts current host check attempt when a check is processed */
int adjust_host_check_attempt(host *hst, int is_active);

/* ensure next check falls into check period */
void delay_host_if_next_check_is_outside_timeperiod(host *);

NAGIOS_END_DECL

#endif
26 changes: 26 additions & 0 deletions src/naemon/checks_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ static void handle_service_check_event(struct nm_event_execution_properties *evp

/* make sure this is a valid time to check the service */
if (check_time_against_period(time(NULL), temp_service->check_period_ptr) == ERROR) {
delay_service_if_next_check_is_outside_timeperiod(temp_service);
return;
}

Expand Down Expand Up @@ -1434,3 +1435,28 @@ static int is_service_result_fresh(service *temp_service, time_t current_time, i

return TRUE;
}

/* ensure next check falls into check period */
void delay_service_if_next_check_is_outside_timeperiod(service *svc)
{
time_t timeperiod_start = time(NULL);

if(svc->next_check == 0) {
return;
}

if(check_time_against_period(svc->next_check, svc->check_period_ptr) == OK) {
return;
}

get_next_valid_time(svc->next_check, &timeperiod_start, svc->check_period_ptr);
if(timeperiod_start == 0) {
return;
}

// add random delay, so not all checks start at the same second
timeperiod_start += ranged_urand(0, retained_scheduling_randomize_window);

log_debug_info(DEBUGL_CHECKS, 1, "delay next service check for %s - %s until check timeperiod starts: %s\n", svc->host_name, svc->description, ctime(&timeperiod_start));
schedule_service_check(svc, timeperiod_start, CHECK_OPTION_ALLOW_POSTPONE);
}
3 changes: 3 additions & 0 deletions src/naemon/checks_service.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ int handle_async_service_check_result(service *, check_result *);
/* Immutable, check if service is reachable */
int check_service_dependencies(service *, int);

/* ensure next check falls into check period */
void delay_service_if_next_check_is_outside_timeperiod(service *);

NAGIOS_END_DECL

#endif
3 changes: 1 addition & 2 deletions src/naemon/objects_timeperiod.c
Original file line number Diff line number Diff line change
Expand Up @@ -661,15 +661,14 @@ int check_time_against_period(time_t test_time, const timeperiod *tperiod)
timerange *temp_timerange = NULL;
time_t midnight = (time_t)0L;

midnight = get_midnight(test_time);

/* if no period was specified, assume the time is good */
if (tperiod == NULL)
return OK;

if (is_time_excluded(test_time, tperiod))
return ERROR;

midnight = get_midnight(test_time);
for (temp_timerange = _get_matching_timerange(test_time, tperiod); temp_timerange != NULL; temp_timerange = temp_timerange->next) {
if (timerange_includes_time(temp_timerange, test_time - midnight))
return OK;
Expand Down

0 comments on commit 9eed0dc

Please sign in to comment.