From 9eed0dcf2431ce91daaea1aa884da0211719589a Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Tue, 28 Jan 2025 16:59:17 +0100 Subject: [PATCH] postpone check until check period starts Given a service with a 24h check interval and a office hours check period. If this service is scheduled for whatever reasons outside office hours, it will be rescheduled 24hours again which is also outside office hours. In the end, this service will never be checked again. So, when naemon detects a check which could not be run because it is outside the check period, postpone the check until the next slot in its check period. --- src/naemon/checks_host.c | 26 ++++++++++++++++++++++++++ src/naemon/checks_host.h | 3 +++ src/naemon/checks_service.c | 26 ++++++++++++++++++++++++++ src/naemon/checks_service.h | 3 +++ src/naemon/objects_timeperiod.c | 3 +-- 5 files changed, 59 insertions(+), 2 deletions(-) diff --git a/src/naemon/checks_host.c b/src/naemon/checks_host.c index 5c2588d7f..d04e9e6d4 100644 --- a/src/naemon/checks_host.c +++ b/src/naemon/checks_host.c @@ -237,6 +237,7 @@ static int run_async_host_check(host *hst, int check_options, double latency) /* make sure this is a valid time to check the host */ if (check_time_against_period(time(NULL), hst->check_period_ptr) != OK) { + delay_host_if_next_check_is_outside_timeperiod(hst); return ERROR; } @@ -1393,3 +1394,28 @@ static int determine_host_reachability(host *hst) log_debug_info(DEBUGL_CHECKS, 2, "No parents were up, so host is UNREACHABLE.\n"); return STATE_UNREACHABLE; } + +/* ensure next check falls into check period */ +void delay_host_if_next_check_is_outside_timeperiod(host *hst) +{ + time_t timeperiod_start = time(NULL); + + if(hst->next_check == 0) { + return; + } + + if(check_time_against_period(hst->next_check, hst->check_period_ptr) == OK) { + return; + } + + get_next_valid_time(hst->next_check, &timeperiod_start, hst->check_period_ptr); + if(timeperiod_start == 0) { + return; + } + + // add random delay, so not all checks start at the same second + timeperiod_start += ranged_urand(0, retained_scheduling_randomize_window); + + log_debug_info(DEBUGL_CHECKS, 1, "delay next service check for %s until check timeperiod starts: %s\n", hst->name, ctime(&timeperiod_start)); + schedule_host_check(hst, timeperiod_start, CHECK_OPTION_ALLOW_POSTPONE); +} diff --git a/src/naemon/checks_host.h b/src/naemon/checks_host.h index 2fa9308d8..a020167a9 100644 --- a/src/naemon/checks_host.h +++ b/src/naemon/checks_host.h @@ -28,6 +28,9 @@ int check_host_dependencies(host *hst, int dependency_type); /* adjusts current host check attempt when a check is processed */ int adjust_host_check_attempt(host *hst, int is_active); +/* ensure next check falls into check period */ +void delay_host_if_next_check_is_outside_timeperiod(host *); + NAGIOS_END_DECL #endif diff --git a/src/naemon/checks_service.c b/src/naemon/checks_service.c index 4fab8215c..79f3f2f70 100644 --- a/src/naemon/checks_service.c +++ b/src/naemon/checks_service.c @@ -194,6 +194,7 @@ static void handle_service_check_event(struct nm_event_execution_properties *evp /* make sure this is a valid time to check the service */ if (check_time_against_period(time(NULL), temp_service->check_period_ptr) == ERROR) { + delay_service_if_next_check_is_outside_timeperiod(temp_service); return; } @@ -1434,3 +1435,28 @@ static int is_service_result_fresh(service *temp_service, time_t current_time, i return TRUE; } + +/* ensure next check falls into check period */ +void delay_service_if_next_check_is_outside_timeperiod(service *svc) +{ + time_t timeperiod_start = time(NULL); + + if(svc->next_check == 0) { + return; + } + + if(check_time_against_period(svc->next_check, svc->check_period_ptr) == OK) { + return; + } + + get_next_valid_time(svc->next_check, &timeperiod_start, svc->check_period_ptr); + if(timeperiod_start == 0) { + return; + } + + // add random delay, so not all checks start at the same second + timeperiod_start += ranged_urand(0, retained_scheduling_randomize_window); + + log_debug_info(DEBUGL_CHECKS, 1, "delay next service check for %s - %s until check timeperiod starts: %s\n", svc->host_name, svc->description, ctime(&timeperiod_start)); + schedule_service_check(svc, timeperiod_start, CHECK_OPTION_ALLOW_POSTPONE); +} diff --git a/src/naemon/checks_service.h b/src/naemon/checks_service.h index a781c6fd0..0e7b39313 100644 --- a/src/naemon/checks_service.h +++ b/src/naemon/checks_service.h @@ -24,6 +24,9 @@ int handle_async_service_check_result(service *, check_result *); /* Immutable, check if service is reachable */ int check_service_dependencies(service *, int); +/* ensure next check falls into check period */ +void delay_service_if_next_check_is_outside_timeperiod(service *); + NAGIOS_END_DECL #endif diff --git a/src/naemon/objects_timeperiod.c b/src/naemon/objects_timeperiod.c index 4a1143b8b..bb43a5fa6 100644 --- a/src/naemon/objects_timeperiod.c +++ b/src/naemon/objects_timeperiod.c @@ -661,8 +661,6 @@ int check_time_against_period(time_t test_time, const timeperiod *tperiod) timerange *temp_timerange = NULL; time_t midnight = (time_t)0L; - midnight = get_midnight(test_time); - /* if no period was specified, assume the time is good */ if (tperiod == NULL) return OK; @@ -670,6 +668,7 @@ int check_time_against_period(time_t test_time, const timeperiod *tperiod) if (is_time_excluded(test_time, tperiod)) return ERROR; + midnight = get_midnight(test_time); for (temp_timerange = _get_matching_timerange(test_time, tperiod); temp_timerange != NULL; temp_timerange = temp_timerange->next) { if (timerange_includes_time(temp_timerange, test_time - midnight)) return OK;