[Watchdog] Check for ACME failures
This commit is contained in:
		| @@ -5,6 +5,8 @@ trap "kill 0" EXIT | ||||
|  | ||||
| # Prepare | ||||
| BACKGROUND_TASKS=() | ||||
| echo "Waiting for containers to settle..." | ||||
| sleep 10 | ||||
|  | ||||
| if [[ "${USE_WATCHDOG}" =~ ^([nN][oO]|[nN])+$ ]]; then | ||||
|   echo -e "$(date) - USE_WATCHDOG=n, skipping watchdog..." | ||||
| @@ -350,6 +352,38 @@ ratelimit_checks() { | ||||
|   return 1 | ||||
| } | ||||
|  | ||||
| acme_checks() { | ||||
|   err_count=0 | ||||
|   diff_c=0 | ||||
|   THRESHOLD=1 | ||||
|   ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME) | ||||
|   if [[ -z "${ACME_LOG_STATUS}" ]]; then | ||||
|     redis-cli -h redis SET ACME_FAIL_TIME 0 | ||||
|     ACME_LOG_STATUS=0 | ||||
|   fi | ||||
|   # Reduce error count by 2 after restarting an unhealthy container | ||||
|   trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1 | ||||
|   while [ ${err_count} -lt ${THRESHOLD} ]; do | ||||
|     err_c_cur=${err_count} | ||||
|     ACME_LOG_STATUS_PREV=${ACME_LOG_STATUS} | ||||
|     ACME_LOG_STATUS=$(redis-cli -h redis GET ACME_FAIL_TIME) | ||||
|     if [[ ${ACME_LOG_STATUS_PREV} != ${ACME_LOG_STATUS} ]]; then | ||||
|       err_count=$(( ${err_count} + 1 )) | ||||
|     fi | ||||
|     [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1 | ||||
|     [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} )) | ||||
|     progress "ACME" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c} | ||||
|     if [[ $? == 10 ]]; then | ||||
|       diff_c=0 | ||||
|       sleep 1 | ||||
|     else | ||||
|       diff_c=0 | ||||
|       sleep $(( ( RANDOM % 30 )  + 10 )) | ||||
|     fi | ||||
|   done | ||||
|   return 1 | ||||
| } | ||||
|  | ||||
| ipv6nat_checks() { | ||||
|   err_count=0 | ||||
|   diff_c=0 | ||||
| @@ -518,6 +552,16 @@ done | ||||
| ) & | ||||
| BACKGROUND_TASKS+=($!) | ||||
|  | ||||
| ( | ||||
| while true; do | ||||
|   if ! acme_checks; then | ||||
|     log_msg "ACME client hit error limit" | ||||
|     echo acme-tiny > /tmp/com_pipe | ||||
|   fi | ||||
| done | ||||
| ) & | ||||
| BACKGROUND_TASKS+=($!) | ||||
|  | ||||
| ( | ||||
| while true; do | ||||
|   if ! ipv6nat_checks; then | ||||
| @@ -567,7 +611,10 @@ while true; do | ||||
|   fi | ||||
|   if [[ ${com_pipe_answer} == "ratelimit" ]]; then | ||||
|     log_msg "At least one ratelimit was applied" | ||||
|     [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "No further information available." | ||||
|     [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please see mailcow UI logs for further information." | ||||
|   elif [[ ${com_pipe_answer} == "acme-tiny" ]]; then | ||||
|     log_msg "acme-tiny client returned non-zero exit code" | ||||
|     [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please check acme-mailcow for ruther information." | ||||
|   elif [[ ${com_pipe_answer} =~ .+-mailcow ]] || [[ ${com_pipe_answer} == "ipv6nat-mailcow" ]]; then | ||||
|     kill -STOP ${BACKGROUND_TASKS[*]} | ||||
|     sleep 3 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user