#!/bin/sh # ---------------------------------------------------------------------------- # /usr/local/nagios/eventhandlers/local_service_restarter - nagios service restarter # # Creation : 2018-01-16 Ansgar Puester # Last update: $Id$ # # Copyright (c) 2018 Ansgar Puester, ansgar.puester(at)freenet(dot)de # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # ---------------------------------------------------------------------------- # Event handler script for restarting a service on the local machine # # Note: This script will only restart the service if the service is # retried 3 times (in a "soft" state) or if the service somehow # manages to fall into a "hard" error state. # # ---------------------------------------------------------------------------- # Main # ---------------------------------------------------------------------------- # set LOGGING to 'yes' to turn on logging to /tmp/local_service_restarter.log LOGGING='no' ACT_DATE=$(date) LOG=/tmp/local_service_restarter.log [ "$LOGGING" = 'yes' ] && echo "local_service_restarter: $ACT_DATE Parameter: $*" >> $LOG STATE=$1 TYPE=$2 ATTEMPT=$3 SERVICE=$4 # What state is the service in? case "$STATE" in OK) # The service just came back up, so don't do anything... ;; WARNING) # We don't really care about warning states, since the service is probably still running... ;; UNKNOWN) # We don't know what might be causing an unknown error, so don't do anything... ;; CRITICAL) # Aha! The service appears to have a problem - perhaps we should restart the server... # Is this a "soft" or a "hard" state? case "$TYPE" in # We're in a "soft" state, meaning that Nagios is in the middle of retrying the # check before it turns into a "hard" state and contacts get notified... SOFT) # What check attempt are we on? We don't want to restart the service on the first # check, because it may just be a fluke! case "$ATTEMPT" in # Wait until the check has been tried 3 times before restarting the service. # If the check fails on the 4th time (after we restart the service), the state # type will turn to "hard" and contacts will be notified of the problem. # Hopefully this will restart the service successfully, so the 4th check will # result in a "soft" recovery. If that happens no one gets notified because we # fixed the problem! 3) [ "$LOGGING" = 'yes' ] && echo "Restarting service $SERVICE (3rd soft critical state)..." >> $LOG # Call the init script to restart the service sudo /etc/init.d/$SERVICE restart >> $LOG ;; esac ;; # The HTTP service somehow managed to turn into a hard error without getting fixed. # It should have been restarted by the code above, but for some reason it didn't. # Let's give it one last try, shall we? # Note: Contacts have already been notified of a problem with the service at this # point (unless you disabled notifications for this service) HARD) [ "$LOGGING" = 'yes' ] && echo "Restarting service $SERVICE..." >> $LOG # Call the init script to restart the service sudo /etc/init.d/$SERVICE restart >> $LOG ;; esac ;; esac exit 0