#!/bin/sh # script to manage nfs in a clustered environment [ -n "$CTDB_BASE" ] || \ CTDB_BASE=$(d=$(dirname "$0") ; cd -P "$d" ; dirname "$PWD") . "${CTDB_BASE}/functions" # service_name is used by various functions # shellcheck disable=SC2034 service_name="nfs" loadconfig service_state_dir=$(ctdb_setup_service_state_dir) || exit $? ###################################################################### service_reconfigure () { # Restart lock manager, notify clients if [ -x "${CTDB_BASE}/statd-callout" ] ; then "${CTDB_BASE}/statd-callout" notify & fi >/dev/null 2>&1 } ###################################################################### ###################################################### # Check the health of NFS services # # Use .check files in $CTDB_NFS_CHECKS_DIR. # Default is "${CTDB_BASE}/nfs-checks.d/" ###################################################### nfs_check_services () { _dir="${CTDB_NFS_CHECKS_DIR:-${CTDB_BASE}/nfs-checks.d}" # Files must end with .check - avoids editor backups, RPM fu, ... for _f in "$_dir"/[0-9][0-9].*.check ; do [ -r "$_f" ] || continue _t="${_f%.check}" _progname="${_t##*/[0-9][0-9].}" nfs_check_service "$_progname" <"$_f" done } ###################################################### # Check the health of an NFS service # # $1 - progname, passed to rpcinfo (looked up in /etc/rpc) # # Reads variables from stdin # # Variables are: # # * family - "tcp" or "udp" or space separated list # default: tcp, not used with "service_check_cmd" # * version - optional, RPC service version number # default is to omit to check for any version, # not used with "service_check_cmd" # * unhealthy_after - number of check fails before unhealthy # default: 1 # * restart_every - number of check fails before restart # default: 0, meaning no restart # * service_stop_cmd - command to stop service # default: no default, must be provided if # restart_every > 0 # * service_start_cmd - command to start service # default: no default, must be provided if # restart_every > 0 # * service_check_cmd - command to check health of service # default is to check RPC service using rpcinfo # * service_debug_cmd - command to debug a service after trying to stop it; # for example, it can be useful to print stack # traces of threads that have not exited, since # they may be stuck doing I/O; # no default, see also function program_stack_traces() # # Quoting in values is not preserved # ###################################################### nfs_check_service () { _progname="$1" # This sub-shell is created to intentionally limit the scope of # variable values read from the .check files. # shellcheck disable=SC2030 ( # Subshell to restrict scope variables... # Defaults family="tcp" version="" unhealthy_after=1 restart_every=0 service_stop_cmd="" service_start_cmd="" service_check_cmd="" service_debug_cmd="" # Eval line-by-line. Expands variable references in values. # Also allows variable name checking, which seems useful. while read _line ; do case "$_line" in \#*|"") : ;; # Ignore comments, blank lines family=*|version=*|\ unhealthy_after=*|restart_every=*|\ service_stop_cmd=*|service_start_cmd=*|\ service_check_cmd=*|service_debug_cmd=*) eval "$_line" ;; *) echo "ERROR: Unknown variable for ${_progname}: ${_line}" exit 1 esac done _service_name="nfs_${_progname}" _ok=false if [ -n "$service_check_cmd" ] ; then # Using eval means variables can contain semicolon separated commands if eval "$service_check_cmd" ; then _ok=true else _err="monitoring service \"${_progname}\" failed" fi else if nfs_check_rpcinfo \ "$_progname" "$version" "$family" >/dev/null ; then _ok=true else _err="$ctdb_check_rpc_out" fi fi if $_ok ; then if [ $unhealthy_after -ne 1 -o $restart_every -ne 0 ] ; then ctdb_counter_init "$_service_name" fi exit 0 fi ctdb_counter_incr "$_service_name" _failcount=$(ctdb_counter_get "$_service_name") _unhealthy=false if [ "$unhealthy_after" -gt 0 ] ; then if [ "$_failcount" -ge "$unhealthy_after" ] ; then _unhealthy=true echo "ERROR: $_err" fi fi if [ "$restart_every" -gt 0 ] ; then if [ $((_failcount % restart_every)) -eq 0 ] ; then if ! $_unhealthy ; then echo "WARNING: $_err" fi nfs_restart_service fi fi if $_unhealthy ; then exit 1 fi return 0 ) || exit 1 } # Uses: service_stop_cmd, service_start_cmd, service_debug_cmd # This function is called within the sub-shell that shellcheck thinks # loses the above variable values. # shellcheck disable=SC2031 nfs_restart_service () { if [ -z "$service_stop_cmd" -o -z "$service_start_cmd" ] ; then die "ERROR: Can not restart service \"${_progname}\" without corresponding service_start_cmd/service_stop_cmd settings" fi echo "Trying to restart service \"${_progname}\"..." # Using eval means variables can contain semicolon separated commands eval "$service_stop_cmd" if [ -n "$service_debug_cmd" ] ; then eval "$service_debug_cmd" fi background_with_logging eval "$service_start_cmd" } ###################################################### # Check an RPC service with rpcinfo ###################################################### ctdb_check_rpc () { _progname="$1" # passed to rpcinfo (looked up in /etc/rpc) _version="$2" # optional, not passed if empty/unset _family="${3:-tcp}" # optional, default is "tcp" case "$_family" in tcp6|udp6) _localhost="${CTDB_RPCINFO_LOCALHOST6:-::1}" ;; *) _localhost="${CTDB_RPCINFO_LOCALHOST:-127.0.0.1}" esac # $_version is not quoted because it is optional # shellcheck disable=SC2086 if ! ctdb_check_rpc_out=$(rpcinfo -T "$_family" "$_localhost" \ "$_progname" $_version 2>&1) ; then ctdb_check_rpc_out="$_progname failed RPC check: $ctdb_check_rpc_out" echo "$ctdb_check_rpc_out" return 1 fi } nfs_check_rpcinfo () { _progname="$1" # passed to rpcinfo (looked up in /etc/rpc) _versions="$2" # optional, space separated, not passed if empty/unset _families="${3:-tcp}" # optional, space separated, default is "tcp" for _family in $_families ; do if [ -n "$_versions" ] ; then for _version in $_versions ; do ctdb_check_rpc "$_progname" "$_version" "$_family" || return $? done else ctdb_check_rpc "$_progname" "" "$_family" || return $? fi done } ################################################################## # use statd-callout to update NFS lock info ################################################################## nfs_update_lock_info () { if [ -x "$CTDB_BASE/statd-callout" ] ; then "$CTDB_BASE/statd-callout" update fi } ###################################################################### nfs_callout_init "$service_state_dir" is_ctdb_managed_service || exit 0 case "$1" in startup) nfs_callout "$@" || exit $? ;; shutdown) nfs_callout "$@" || exit $? ;; takeip) nfs_callout "$@" || exit $? ctdb_service_set_reconfigure ;; releaseip) nfs_callout "$@" || exit $? ctdb_service_set_reconfigure ;; ipreallocated) if ctdb_service_needs_reconfigure ; then ctdb_service_reconfigure fi ;; monitor) nfs_callout "monitor-pre" || exit $? # Check that directories for shares actually exist if [ "$CTDB_NFS_SKIP_SHARE_CHECK" != "yes" ] ; then nfs_callout "monitor-list-shares" | ctdb_check_directories || \ exit $? fi update_tickles 2049 nfs_update_lock_info nfs_check_services nfs_callout "monitor-post" || exit $? ;; esac exit 0