Various changes...

This commit is contained in:
andre.peters
2017-12-09 13:15:24 +01:00
parent 873222d5f8
commit 2519738094
6 changed files with 84 additions and 73 deletions

View File

@@ -28,21 +28,19 @@ progress() {
[[ ${CURRENT} -gt ${TOTAL} ]] && return
[[ ${CURRENT} -lt 0 ]] && CURRENT=0
PERCENT=$(( 200 * ${CURRENT} / ${TOTAL} % 2 + 100 * ${CURRENT} / ${TOTAL} ))
log_msg "${SERVICE} health level: ${PERCENT}% (${CURRENT}/${TOTAL}), health trend: ${DIFF}"
log_data "$(printf "%d,%d,%d,%d" ${PERCENT} ${CURRENT} ${TOTAL} ${DIFF})" "${SERVICE}"
redis-cli -h redis LPUSH WATCHDOG_LOG "{\"time\":\"$(date +%s)\",\"service\":\"${SERVICE}\",\"lvl\":\"${PERCENT}\",\"hpnow\":\"${CURRENT}\",\"hptotal\":\"${TOTAL}\",\"hpdiff\":\"${DIFF}\"}" > /dev/null
log_msg "${SERVICE} health level: ${PERCENT}% (${CURRENT}/${TOTAL}), health trend: ${DIFF}" no_redis
}
log_msg() {
redis-cli -h redis LPUSH WATCHDOG_LOG "{\"time\":\"$(date +%s)\",\"message\":\"$(printf '%s' "${1}")\"}" > /dev/null
if [[ ${2} != "no_redis" ]]; then
redis-cli -h redis LPUSH WATCHDOG_LOG "{\"time\":\"$(date +%s)\",\"message\":\"$(printf '%s' "${1}" | \
tr '%&;$"_[]{}-\r\n' ' ')\"}" > /dev/null
fi
redis-cli -h redis LTRIM WATCHDOG_LOG 0 9999 > /dev/null
echo $(date) $(printf '%s\n' "${1}")
}
log_data() {
[[ -z ${1} ]] && return 1
[[ -z ${2} ]] && return 2
redis-cli -h redis LPUSH WATCHDOG_DATA "{\"time\":\"$(date +%s)\",\"service\":\"data\",\"$(printf '%s' "${2}")\":\"$(printf '%s' "${1}")\"}" > /dev/null
}
function mail_error() {
[[ -z ${1} ]] && return 1
[[ -z ${2} ]] && return 2
@@ -234,27 +232,6 @@ Empty
return 1
}
dns_checks() {
err_count=0
diff_c=0
THRESHOLD=28
# Reduce error count by 2 after restarting an unhealthy container
trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
while [ ${err_count} -lt ${THRESHOLD} ]; do
host_ip=$(get_container_ip unbound-mailcow)
err_c_cur=${err_count}
/usr/lib/nagios/plugins/check_dns -H google.com 1>&2; err_count=$(( ${err_count} + ($? * 2)))
/usr/lib/nagios/plugins/check_dns -s ${host_ip} -H google.com 1>&2; err_count=$(( ${err_count} + ($? * 2)))
dig +dnssec org. @${host_ip} | grep -E 'flags:.+ad' 1>&2; err_count=$(( ${err_count} + ($? * 2)))
[ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
[ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
progress "Unbound" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
diff_c=0
sleep $(( ( RANDOM % 30 ) + 10 ))
done
return 1
}
# Create watchdog agents
(
while true; do
@@ -322,17 +299,6 @@ done
) &
BACKGROUND_TASKS+=($!)
(
while true; do
if ! dns_checks; then
log_msg "Unbound hit error limit"
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${WATCHDOG_NOTIFY_EMAIL}" "unbound-mailcow"
#echo unbound-mailcow > /tmp/com_pipe
fi
done
) &
BACKGROUND_TASKS+=($!)
(
while true; do
if ! rspamd_checks; then