-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathcheck_wan.sh
150 lines (137 loc) · 4.67 KB
/
check_wan.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash
#
# Uses ICMP ping to check if WAN/LAN is losing packages or slow response
#
# by Felipe Ferreira 01/2019
#
# IMPORTANT: this script name must be "check_wan.sh or check_lan.sh"
# It should only send e-mail if an error happens more then X time in a short period of time (so we are sure there is a problem)
#
# COPYRIGHT: This code is released as open source software under the GPL v3 license.
# Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
#
# TIPS: a good way to run this script on the background is to use the command: nohup ./check_wan.sh 50 &
#
# TODO: improve the sendemail to check the down log file timestamp and send only every X minutes
# integrate with centreon clapi to set hosts in downtime (avoid too many e-mails for a network problem)
SNAME=$(echo "$0" |awk -F"_" '{ print $NF}' | sed 's/\.sh//g')
D=$(date +%D | sed 's/\//_/g')
DH=$(date +%H_%M)
############################
##### EDIT HERE #####
PINGS="5"
SLEEP="10"
ERRORCOUNTCRIT=2 # only after two 'consecutive' errors it should send out email
ADDRESS_WAN=google.it
ADDRESS_LAN=172.31.0.123
EMAILTO="[email protected]"
##############################
CRIT=$1
if [ "$SNAME" == "wan" ]; then
address=$ADDRESS_WAN
if [ -z $CRIT ]; then
CRIT=44
fi
elif [ "$SNAME" == "lan" ]; then
address=$ADDRESS_LAN
if [ -z $CRIT ]; then
CRIT=14 # threashold for alert in ms
fi
else
echo "ERROR - please check this script name $0 it should be: check_wan.sh or check_lan.sh"
exit 2
fi
FDIR="/tmp/check_${SNAME}/$D"
F="$FDIR/check_${SNAME}.log"
FDOWN="$FDIR/_down_$DH.log"
#####################
EMAILSENT=0
COUNT=0
ERRORCOUNT=0
internet=1 # default to internet is up
if [ ! -d $FDIR ]; then
mkdir -p $FDIR
fi
if [ ! -f "$F" ]; then
touch $F
fi
#################################################################
sendme() {
SUBJ=$1
MAILB=$2
TM="/$FDIR/mailtest"
echo "From: [email protected]" > "$TM"
echo "To: $EMAILTO" >> "$TM"
echo "$SUBJ" >> $TM
echo "" >> $TM
echo $MAILB >> $TM
if [ $EMAILSENT -eq 0 ]; then
cat $TM | /usr/sbin/sendmail -t
EMAILSENT=1
ERRORCOUNT=0
echo "### EMAIL SENT"
fi
}
################################################################
echo -e "\nPinging $address slow network threashold is $CRIT ms (saving logs at $FDIR)"
# INFINITE LOOP
while true;
do
# DH=$(date +%H_%M)
DH=$(date +%H)
FDOWN="$FDIR/_down_$DH.log"
echo -n "$COUNT - $(date +"%a, %b %d, %r") -- "
ping -c $PINGS ${address} > $F
if [[ $? -ne 0 ]]; then
if [[ ${internet} -eq 1 ]]; then # edge trigger -- was up now down
echo -n "Internet DOWN"
cp -fv $F $FDOWN
sendme "Subject: WAN Network Problem - INTERNET DOWN" "$(tail -n 5 $FDOWN)"
else
echo -n "... still down (log at $FDOWN)"
fi
internet=0
else
if [[ ${internet} -eq 0 ]]; then # was down and came up
echo -n $("Internet back up")
fi
internet=1
E2=$(tail -n 1 "$F")
#check if any packages loss
P=$(tail -n 2 "$F" |grep -c "0% packet loss")
if [ "$P" -ne "1" ]; then
cp -fv $F $FDOWN
E1=$(tail -n 2 "$F"|head -n1 )
echo "ERROR - We lost some packages! $E1 $E2"
sendme "Subject: $SNAME Network Problem - losing packages" "LAN network is losing packages checked from $HOSTNAME to $address and lost packages at $(date +"%a, %b %d, %r") $(cat $FDOWN)"
fi
#check average ping speed (ms)
MS=$(echo "$E2"|awk -F"/" '{ print $6 }')
MS=$(echo $MS/1 |bc)
echo "Average Response Time of Last ${PINGS} pings is $MS ms"
if [ $MS -gt $CRIT ]; then
ERRORCOUNT=$(( $ERRORCOUNT + 1 ))
ERRMSG="$(date +"%a, %b %d, %r") -- ERROR (${ERRORCOUNT}/${ERRORCOUNTCRIT}) - The $SNAME network is too slow, average ${PINGS} from $address pings was ${MS} ms slower then $CRIT threashold"
if [ $ERRORCOUNT -ge $ERRORCOUNTCRIT ]; then
echo $ERRMSG | tee -a $FDOWN
sendme "Subject: $SNAME Network Problem slow response (${MS}/${CRIT})" "$ERRMSG"
else
echo $ERRMSG | tee -a $FDOWN
fi
elif [ $ERRORCOUNT -gt 1 ]; then
EMAILSENT=0
ERRORCOUNT=0
OKMSG="$(date +"%a, %b %d, %r") -- RECOVERED (${ERRORCOUNT}/${ERRORCOUNTCRIT}) - The $SNAME network is back to normal, average ${PINGS} from $address pings was ${MS} of $CRIT threashold"
sendme "Subject: $SNAME Network Recovery (${MS}/${CRIT})" "$OKMSG"
fi
fi
#RESET EMIL SENT EVERY 50 to avoid spamming we alert only every 50 checks (to be improved)
if [ $COUNT -eq 50 ]; then
echo -e "\n$COUNT is 50, reseting email send and error count"
EMAILSENT=0
ERRORCOUNT=0
COUNT=0
fi
sleep $SLEEP ;
COUNT=$(echo "$COUNT + 1"|bc)
done