#!/bin/sh # ---------------------------------------------------------------------------- # /usr/local/nagios/eventhandlers/remote_service_restarter - nagios service restarter # # Creation : 2018-01-16 Ansgar Puester # Last update: $Id$ # # Copyright (c) 2018 Ansgar Puester, ansgar.puester(at)freenet(dot)de # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # ---------------------------------------------------------------------------- # Event handler script for restarting a service on a remote machine # # Note: This script will only restart the service if the service is # retried 3 times (in a "soft" state) or if the service somehow # manages to fall into a "hard" error state. # # Build nrpe command using this variable nrpe_command='' # set LOGGING to 'yes' to turn on logging to /tmp/remote_service_restarter.log LOGGING='no' # ---------------------------------------------------------------------------- # Get host imformation from /etc/nagios/restarter.conf # ---------------------------------------------------------------------------- get_host_info() { act_host=$1 # Config file: /etc/nagios/restarter.conf conf_file='/etc/nagios/restarter.conf' if [ -f $conf_file ] then conf_line='' echo "act_host=$act_host" conf_line=$(grep "^$act_host:" $conf_file) if [ -z "$conf_line" ] then [ "$LOGGING" = 'yes' ] && echo "remote_service_restarter: $ACT_DATE Fatal host $act_host not found in $conf_file" exit 1 else old_ifs="$IFS" IFS=':' set -- $conf_line echo $* [ "$4" = 'ssl' ] && opt='' || opt=' -n' nrpe_command="/usr/local/nagios/plugins/check_nrpe$opt -H $2 -p $3 -c $SERVICE" IFS="$old_ifs" fi else [ "$LOGGING" = 'yes' ] && echo "remote_service_restarter: $ACT_DATE Fatal $conf_file not found" exit 1 fi } # ---------------------------------------------------------------------------- # Main # ---------------------------------------------------------------------------- ACT_DATE=$(date) LOGFILE=/tmp/remote_service_restarter.log [ "$LOGGING" = 'yes' ] && echo "remote_service_restarter: $ACT_DATE Parameter: $*" >> $LOGFILE STATE=$1 TYPE=$2 ATTEMPT=$3 HOSTNAME=$4 SERVICE=$5 # get host information get_host_info $HOSTNAME # What state is the service in? case "$STATE" in OK) # The service just came back up, so don't do anything... ;; WARNING) # We don't really care about warning states, since the service is probably still running... ;; UNKNOWN) # We don't know what might be causing an unknown error, so don't do anything... ;; CRITICAL) # Aha! The service appears to have a problem - perhaps we should restart the server... # Is this a "soft" or a "hard" state? case "$TYPE" in # We're in a "soft" state, meaning that Nagios is in the middle of retrying the # check before it turns into a "hard" state and contacts get notified... SOFT) # What check attempt are we on? We don't want to restart the service on the first # check, because it may just be a fluke! case "$ATTEMPT" in # Wait until the check has been tried 3 times before restarting the service. # If the check fails on the 4th time (after we restart the service), the state # type will turn to "hard" and contacts will be notified of the problem. # Hopefully this will restart the service successfully, so the 4th check will # result in a "soft" recovery. If that happens no one gets notified because we # fixed the problem! 3) [ "$LOGGING" = 'yes' ] && echo "Restarting $SERVICE with nrpe command (3rd soft critical state)..." >> $LOGFILE # Call the init script to restart the service [ "$LOGGING" = 'yes' ] && echo "Using nrpe command: $nrpe_command" >> $LOGFILE $nrpe_command ;; esac ;; # The HTTP service somehow managed to turn into a hard error without getting fixed. # It should have been restarted by the code above, but for some reason it didn't. # Let's give it one last try, shall we? # Note: Contacts have already been notified of a problem with the service at this # point (unless you disabled notifications for this service) HARD) [ "$LOGGING" = 'yes' ] && echo "Restarting $SERVICE with nrpe command..." >> $LOGFILE # Call the init script to restart the service using nrpe command [ "$LOGGING" = 'yes' ] && echo $nrpe_command >> $LOGFILE $nrpe_command ;; esac ;; esac exit 0