#!/bin/sh # Copyright (C) 2000-2008, Parallels, Inc. All rights reserved. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # # vzmigrate is used for container migration to another node # # Usage: # vzmigrate [-r yes|no] [--ssh=] [--keep-dst] [--online] [-v] # destination_address CTID # Options: # -r, --remove-area yes|no # Whether to remove container private area on the source # hardware node after successful container migration. # --ssh= # Additional options that will be passed to ssh while establishing # connection to destination HN. Please be careful with options # passed, DO NOT pass destination hostname. # --keep-dst # Do not clean synced destination container private area # in case of some error. It makes sense to use this option # for big container migration to avoid syncing container # private area again in case some error (on container stop # for example) occurs during first migration attempt. # --online # Perform online (zero-downtime) migration: during the migration # container freezes for a while and after the migration it # continues working as though nothing has happened. # -v # Verbose mode. Causes vzmigrate to print debugging messages about # its progress (including some time statistics). # # Examples: # Online migration of CT #101 to foo.com: # vzmigrate --online foo.com 101 # Migration of CT #102 to foo.com with downtime: # vzmigrate foo.com 102 # NOTE: # This program uses ssh as a transport layer. You need to put ssh # public key to destination node and be able to connect without # entering a password. ACT_SCRIPTS_SFX="start stop mount umount" # blowfish is a fast block cipher, much faster then 3des SSH_OPTIONS="-c blowfish" SCP_OPTIONS=$SSH_OPTIONS RSYNC_OPTIONS="-aH --delete --numeric-ids" RSYNC="rsync $RSYNC_OPTIONS" online=0 verbose=0 remove_area=1 keep_dst=0 debug=0 confdir="/etc/vz/conf" vzconf="/etc/vz/vz.conf" tmpdir="/var/tmp" act_scripts= # Errors: MIG_ERR_USAGE=1 MIG_ERR_VPS_IS_STOPPED=2 MIG_ERR_CANT_CONNECT=4 MIG_ERR_COPY=6 MIG_ERR_START_VPS=7 MIG_ERR_STOP_SOURCE=8 MIG_ERR_EXISTS=9 MIG_ERR_NOEXIST=10 MIG_ERR_IP_IN_USE=12 MIG_ERR_QUOTA=13 MIG_ERR_CHECKPOINT=$MIG_ERR_STOP_SOURCE MIG_ERR_MOUNT_VPS=$MIG_ERR_START_VPS MIG_ERR_RESTORE_VPS=$MIG_ERR_START_VPS MIG_ERR_OVZ_NOT_RUNNING=14 MIG_ERR_APPLY_CONFIG=15 usage() { cat >&2 <] [--keep-dst] [--online] [-v] destination_address Options: -r, --remove-area yes|no Whether to remove container on source host after successful migration. --ssh= Additional options that will be passed to ssh while establishing connection to destination host. Please be careful with options passed, DO NOT pass destination hostname. --keep-dst Do not clean synced destination container private area in case of some error. It makes sense to use this option on big container migration to avoid re-syncing container private area in case some error (on container stop for example) occurs during first migration attempt. --online Perform online (zero-downtime) migration: during the migration the container freezes for some time and after the migration it keeps working as though nothing has happened. -v Verbose mode. Causes vzmigrate to print debugging messages about its progress (including some time statistics). EOF exit $MIG_ERR_USAGE } # Logs message # There are 3 types of messages: # 0 - error messages (print to stderr) # 1 - normal messages (print to stdout) # 2 - debug messages (print to stdout if in verbose mode) log () { if [ $1 -eq 0 ]; then shift echo -e "Error: " $@ >&2 elif [ $1 -eq 1 ]; then shift echo -e $@ elif [ $verbose -eq 1 ]; then shift echo -e " " $@ fi } # Executes command and returns result of execution # There are 2 types of execution: # 1 - normal execution (all output will be printed) # 2 - debug execution (output will be printed if verbose mode is set, # in other case stdout and stderr redirected to /dev/null) logexec () { if [ $1 -eq 1 -o $verbose -eq 1 ]; then shift $@ else shift $@ >/dev/null 2>&1 fi } undo_conf () { $SSH "root@$host" "rm -f $vpsconf" } undo_act_scripts () { if [ -n "$act_scripts" ] ; then $SSH "root@$host" "rm -f $act_scripts" fi undo_conf } undo_private () { if [ $keep_dst -eq 0 ]; then $SSH "root@$host" "rm -rf $VE_PRIVATE" fi undo_act_scripts } undo_root () { $SSH "root@$host" "rm -rf $VE_ROOT" undo_private } undo_quota_init () { [ "${DISK_QUOTA}" = 'no' ] || $SSH "root@$host" "vzquota drop $VEID" undo_root } undo_quota_on () { [ "${DISK_QUOTA}" = 'no' ] || $SSH "root@$host" "vzquota off $VEID" undo_quota_init } undo_sync () { # Root will be destroed in undo_root undo_quota_on } undo_suspend () { logexec 2 vzctl chkpnt $VEID --resume undo_sync } undo_dump () { if [ $debug -eq 0 ]; then rm -f "$VE_DUMPFILE" fi undo_suspend } undo_copy_dump () { $SSH "root@$host" "rm -f $VE_DUMPFILE" undo_suspend } undo_stop () { if [ "$state" = "running" ]; then vzctl start $VEID elif [ "$mounted" = "mounted" ]; then vzctl mount $VEID fi undo_sync } undo_source_stage() { if [ $online -eq 1 ]; then undo_copy_dump else undo_stop fi } undo_quota_dump () { rm -f "$VE_QUOTADUMP" undo_source_stage } undo_copy_quota () { $SSH "root@$host" "rm -f $VE_QUOTADUMP" undo_quota_dump } undo_undump () { logexec 2 $SSH root@$host vzctl restore $VEID --kill undo_copy_quota } get_status() { exist=$3 mounted=$4 state=$5 } get_time () { awk -v t2=$2 -v t1=$1 'BEGIN{print t2-t1}' } if [ $# -lt 2 ]; then usage fi while [ ! -z "$1" ]; do log 1 "OPT:$1" case "$1" in --online) online=1 ;; -v) verbose=1 ;; --remove-area|-r) shift if [ "$1" = "yes" ]; then remove_area=1 elif [ "$1" = "no" ]; then remove_area=0 else usage fi ;; --keep-dst) keep_dst=1 ;; --ssh=*) SSH_OPTIONS="$SSH_OPTIONS $(echo $1 | cut -c7-)" SCP_OPTIONS="`echo $SSH_OPTIONS | sed 's/-p/-P/1'`" ;; *) break ;; esac shift done SSH="ssh $SSH_OPTIONS" SCP="scp $SCP_OPTIONS" export RSYNC_RSH=$SSH host=$1 shift VEID=$1 shift if [ -z "$host" -o -z "$VEID" -o $# -ne 0 ]; then usage fi vpsconf="$confdir/$VEID.conf" if [ ! -r "$vzconf" -o ! -r "$vpsconf" ]; then log 0 "Can't read global config or CT #$VEID config file" exit $MIG_ERR_NOEXIST fi get_status $(vzctl status $VEID) if [ "$exist" = "deleted" ]; then log 0 "CT #$VEID doesn't exist" exit $MIG_ERR_NOEXIST fi if [ $online -eq 1 ]; then log 1 "Starting online migration of CT $VEID to $host" else log 1 "Starting migration of CT $VEID to $host" fi # Try to connect to destination if ! logexec 2 $SSH -o BatchMode=yes root@$host /bin/true; then log 0 "Can't connect to destination address using public key" log 0 "Please put your public key to destination node" exit $MIG_ERR_CANT_CONNECT fi # Check if OpenVZ is running if ! logexec 2 $SSH -o BatchMode=yes root@$host /etc/init.d/vz status ; then log 0 "OpenVZ is not running on the target machine" log 0 "Can't continue migration" exit $MIG_ERR_OVZ_NOT_RUNNING fi # Check if CPT modules are loaded for online migration if [ $online -eq 1 ]; then if [ ! -f /proc/cpt ]; then log 0 "vzcpt module is not loaded on the source node" log 0 "Can't continue online migration" exit $MIG_ERR_OVZ_NOT_RUNNING fi if ! logexec 2 $SSH -o BatchMode=yes root@$host "test -f /proc/rst"; then log 0 "vzrst module is not loaded on the destination node" log 0 "Can't continue online migration" exit $MIG_ERR_OVZ_NOT_RUNNING fi fi dst_exist=$($SSH "root@$host" "vzctl status $VEID" | awk '{print $3}') if [ "$dst_exist" = "exist" ]; then log 0 "CT #$VEID already exists on destination node" exit $MIG_ERR_EXISTS fi if [ $online -eq 1 -a "$state" != "running" ]; then log 0 "Can't perform online migration of a stopped container" exit $MIG_ERR_VPS_IS_STOPPED fi log 2 "Loading $vzconf and $vpsconf files" . "$vzconf" . "$vpsconf" VE_DUMPFILE="$tmpdir/dump.$VEID" VE_QUOTADUMP="$tmpdir/quotadump.$VEID" log 2 "Check IPs on destination node: $IP_ADDRESS" for IP in $IP_ADDRESS; do if [ $($SSH "root@$host" "grep -c \" $IP \" /proc/vz/veip") -gt 0 ]; then log 0 "IP address $IP already in use on destination node" exit $MIG_ERR_IP_IN_USE fi done log 1 "Preparing remote node" log 2 "Copying config file" if ! logexec 2 $SCP $vpsconf root@$host:$vpsconf ; then log 0 "Failed to copy config file" exit $MIG_ERR_COPY fi logexec 2 $SSH root@$host vzctl set $VEID --applyconfig_map name --save # vzctl return code 20 or 21 in case of unrecognized option if [ $? != 20 ] && [ $? != 21 ] && [ $? != 0 ]; then log 0 "Failed to apply config on destination node" undo_conf exit $MIG_ERR_APPLY_CONFIG fi for sfx in $ACT_SCRIPTS_SFX; do file="$confdir/$VEID.$sfx" if [ -f "$file" ]; then act_scripts="$act_scripts $file" fi done if [ -n "$act_scripts" ]; then log 2 "Copying action scripts" if ! logexec 2 $SCP $act_scripts root@$host:$confdir ; then log 0 "Failed to copy action scripts" undo_conf exit $MIG_ERR_COPY fi fi log 2 "Creating remote container root dir" if ! $SSH "root@$host" "mkdir -p $VE_ROOT"; then log 0 "Failed to make container root directory" undo_act_scripts exit $MIG_ERR_COPY fi log 2 "Creating remote container private dir" if ! $SSH "root@$host" "mkdir -p $VE_PRIVATE"; then log 0 "Failed to make container private area directory" undo_private exit $MIG_ERR_COPY fi if [ "${DISK_QUOTA}" != "no" ]; then log 1 "Initializing remote quota" log 2 "Quota init" if ! $SSH "root@$host" "vzctl quotainit $VEID"; then log 0 "Failed to initialize quota" undo_root exit $MIG_ERR_QUOTA fi log 2 "Turning remote quota on" if ! $SSH "root@$host" "vzctl quotaon $VEID"; then log 0 "Failed to turn quota on" undo_quota_init exit $MIG_ERR_QUOTA fi else log 2 "VZ disk quota disabled -- skipping quota migration" fi log 1 "Syncing private" if ! $RSYNC --progress \ "$VE_PRIVATE" "root@$host:${VE_PRIVATE%/*}" | grep "% of" | awk -v ORS="\r" '{print $10}'; then log 0 "Failed to sync container private areas" undo_quota_on exit $MIG_ERR_COPY fi if [ $online -eq 1 ]; then log 1 "Live migrating container..." log 2 "Suspending container" time_suspend=$(date +%s.%N) if ! logexec 2 vzctl chkpnt $VEID --suspend ; then log 0 "Failed to suspend container" undo_sync exit $MIG_ERR_CHECKPOINT fi log 2 "Dumping container" if ! logexec 2 vzctl chkpnt $VEID --dump --dumpfile $VE_DUMPFILE ; then log 0 "Failed to dump container" undo_suspend exit $MIG_ERR_CHECKPOINT fi log 2 "Copying dumpfile" time_copy_dump=$(date +%s.%N) if ! logexec 2 $SCP $VE_DUMPFILE root@$host:$VE_DUMPFILE ; then log 0 "Failed to copy dump" undo_dump exit $MIG_ERR_COPY fi else if [ "$state" = "running" ]; then log 1 "Stopping container" if ! logexec 2 vzctl stop $VEID ; then log 0 "Failed to stop container" undo_sync exit $MIG_ERR_STOP_SOURCE fi elif [ "$mounted" = "mounted" ]; then log 1 "Unmounting container" if ! logexec 2 vzctl umount $VEID ; then log 0 "Failed to umount container" undo_sync exit $MIG_ERR_STOP_SOURCE fi fi fi if [ "$state" = "running" ]; then log 2 "Syncing private (2nd pass)" time_rsync2=$(date +%s.%N) if ! $RSYNC \ "$VE_PRIVATE" "root@$host:${VE_PRIVATE%/*}"; then log 0 "Failed to sync container private areas" undo_source_stage exit $MIG_ERR_COPY fi fi if [ "${DISK_QUOTA}" != "no" ]; then log 1 "Syncing 2nd level quota" log 2 "Dumping 2nd level quota" time_quota=$(date +%s.%N) if ! vzdqdump $VEID -U -G -T > "$VE_QUOTADUMP"; then log 0 "Failed to dump 2nd level quota" undo_quota_dump exit $MIG_ERR_QUOTA fi log 2 "Copying 2nd level quota" if ! logexec 2 $SCP $VE_QUOTADUMP root@$host:$VE_QUOTADUMP ; then log 0 "Failed to copy 2nd level quota dump" undo_quota_dump exit $MIG_ERR_COPY fi log 2 "Load 2nd level quota" if ! $SSH "root@$host" "(vzdqload $VEID -U -G -T < $VE_QUOTADUMP && vzquota reload2 $VEID)"; then log 0 "Failed to load 2nd level quota" undo_copy_quota exit $MIG_ERR_QUOTA fi else log 2 "VZ disk quota disabled -- skipping quota migration" fi if [ $online -eq 1 ]; then log 2 "Undumping container" time_undump=$(date +%s.%N) if ! logexec 2 $SSH root@$host vzctl restore $VEID --undump \ --dumpfile $VE_DUMPFILE --skip_arpdetect ; then log 0 "Failed to undump container" undo_copy_quota exit $MIG_ERR_RESTORE_VPS fi log 2 "Resuming container" if ! logexec 2 $SSH root@$host vzctl restore $VEID --resume ; then log 0 "Failed to resume container" undo_undump exit $MIG_ERR_RESTORE_VPS fi time_finish=$(date +%s.%N) log 2 "Times:" log 2 "\tSuspend + Dump:\t" $(get_time $time_suspend $time_copy_dump) log 2 "\tCopy dump file:\t" $(get_time $time_copy_dump $time_rsync2) log 2 "\tSecond rsync:\t" $(get_time $time_rsync2 $time_quota) log 2 "\t2nd level quota:\t" $(get_time $time_quota $time_undump) log 2 "\tUndump + Resume:\t" $(get_time $time_undump $time_finish) log 2 "Total time: " $(get_time $time_suspend $time_finish) log 1 "Cleanup" log 2 "Killing container" logexec 2 vzctl chkpnt $VEID --kill logexec 2 vzctl umount $VEID log 2 "Removing dumpfiles" rm -f "$VE_DUMPFILE" $SSH "root@$host" "rm -f $VE_DUMPFILE" else if [ "$state" = "running" ]; then log 1 "Starting container" if ! logexec 2 $SSH root@$host vzctl start $VEID ; then log 0 "Failed to start container" undo_copy_quota exit $MIG_ERR_START_VPS fi elif [ "$mounted" = "mounted" ]; then log 1 "Mounting container" if ! logexec 2 $SSH root@$host vzctl mount $VEID ; then log 0 "Failed to mount container" undo_copy_quota exit $MIG_ERR_MOUNT_VPS fi else log 1 "Turning quota off" if ! logexec 2 $SSH root@$host vzquota off $VEID ; then log 0 "failed to turn quota off" undo_copy_quota exit $MIG_ERR_QUOTA fi fi log 1 "Cleanup" fi if [ $remove_area -eq 1 ]; then log 2 "Destroying container" logexec 2 vzctl destroy $VEID else # Move config as veid.migrated to allow backward migration mv -f $vpsconf $vpsconf.migrated fi