#!/bin/bash

# Authors:
#  Andrew Beekhof <abeekhof@redhat.com>
#  Fabio M. Di Nitto <fdinitto@redhat.com>
#
# License: Revised BSD

# chkconfig: - 99 01
# description: Pacemaker Cluster Manager
# processname: pacemakerd
#
### BEGIN INIT INFO
# Provides:     pacemaker
# Required-Start:   $network $remote_fs corosync
# Should-Start:     $syslog
# Required-Stop:    $network $remote_fs corosync
# Default-Start:
# Default-Stop:
# Short-Description:    Starts and stops Pacemaker Cluster Manager.
# Description:      Starts and stops Pacemaker Cluster Manager.
### END INIT INFO

desc="Pacemaker Cluster Manager"
prog="pacemakerd"
daemons='crmd stonithd attrd cib lrmd pacemakerd pengine'

# set secure PATH
PATH="/sbin:/bin:/usr/sbin:/usr/bin:@sbindir@"

checkrc() {
    if [ $? = 0 ]; then
    success
    else
    failure
    fi
}

success()
{
    echo -ne "[  OK  ]\r"
}

failure()
{
    echo -ne "[FAILED]\r"
}

log() 
{
    logger -t pacemaker -p daemon.notice "$*"
}

notify()
{
    log "$*"
    echo -n "$*"
}

status()
{
    local running=""
    for process in $*; do
        pid=$(pidof $process 2>/dev/null)
        [ $? -eq 0 ] && running="$process($pid) $running"
    done
    local rtrn=0
    if [ -z "$running" ]; then
        echo "Pacemaker components are stopped"
        if [ -f "/var/run/$prog.pid" ]; then
            rtrn=1
        else
            rtrn=3
        fi
    else
        echo "Pacemaker components are running (${running% })"
        if ! pidof $prog 2>/dev/null; then
            echo "But main process (pacemakerd) is dead"
        fi
    fi
    return $rtrn
}

if [ -d /etc/default ]; then
    [ -f /etc/init.d/functions ] && . /etc/init.d/functions
set -a
    [ -f /etc/default/pacemaker ] && . /etc/default/pacemaker
    [ -f /etc/default/sbd ] && . /etc/default/sbd
set +a
fi

LOCK_DIR="."
if [ -d "/var/lock/subsys" ]; then
    LOCK_DIR="/var/lock/subsys" 
elif [ -d "/var/lock" ]; then
    LOCK_DIR="/var/lock" 
fi
[ -z "$LOCK_FILE" ] && LOCK_FILE="$LOCK_DIR/pacemaker"

# Unless specified otherwise, assume cman is in use if cluster.conf exists
if [ x = "x$PCMK_STACK" -a -f "/etc/cluster/cluster.conf" ]; then
    PCMK_STACK=cman
fi

# Check if there is a valid watchdog-device configured in sbd config
if [ x != "x$SBD_WATCHDOG_DEV" -a "/dev/null" != "$SBD_WATCHDOG_DEV" -a -c "$SBD_WATCHDOG_DEV" ]; then
    # enhance for unavailable chkconfig - don't touch sbd for now
    if chkconfig --list sbd_helper 2>/dev/null | grep -q ":on"; then
        SBD_SERVICE=sbd_helper
    fi
fi

start()
{
    notify "Starting $desc"

    # most recent distributions use tmpfs for $/var/run
    # to avoid to clean it up on every boot.
    # they also assume that init scripts will create
    # required subdirectories for proper operations
    mkdir -p "/var/run"

    if status $prog > /dev/null 2>&1; then
        success
    else
        $prog > /dev/null 2>&1 &

        # Time to connect to corosync and fail
        sleep 5

        if status $prog > /dev/null 2>&1; then
            touch "$LOCK_FILE"
            pidof $prog > "/var/run/$prog.pid"
            success
        else
            failure
            rtrn=1
        fi
    fi
    echo
}

cman_pre_start()
{
    # start cman if it's not running
    service cman status >/dev/null 2>&1
    if [ $? -ne 0 ]; then
    service cman start
    if [ $? -ne 0 ]; then
        notify "Aborting startup of $desc"
        echo
        exit 1
    fi
    fi

    # start cman's friends if they're not running but were configured to start automatically
    for cservice in cmirrord clvmd gfs2 $SBD_SERVICE; do
    chkconfig --list $cservice 2>/dev/null | grep -q ":on"
    if [ $? -eq 0 ]; then
        service $cservice status >/dev/null 2>&1
        if [ $? -ne 0 ]; then
        log "Attempting to start $cservice"
        service $cservice start
        fi
    fi
    done
}

cman_pre_stop()
{
    # if cman is not running there is nothing we need to do here
    service cman status >/dev/null 2>&1
    if [ $? -ne 0 ]; then
    return
    fi

    has_lvm=`crm_resource -c | grep Resource: | grep LVM`

    # migrate resources to another node or shut them down
    cname=`crm_node --name`
    crm_attribute -N $cname -n standby -v true -l reboot
    notify "Waiting for shutdown of managed resources"


    while [ 1 = 1 ]; do
    # 0x0000000000000002 means managed
    active=`crm_resource -c | grep Resource: | grep 0x...............[2367] | awk '{print $9}' | grep "^${cname}$" | wc -l`
    if [ $active = 0 ]; then
        break;
    fi
    sleep 2
    echo -n "."
    done
    success
    echo

    if [ -d /sys/kernel/dlm/ ]; then
    lockspace="$(ls -1 /sys/kernel/dlm/)"
    if [ -n "$lockspace" ]; then
        notify "DLM lockspace still in use"
        echo ""

        for cservice in gfs2 clvmd cmirrord; do
        service $cservice status >/dev/null 2>&1
        if [ $? -eq 0 ]; then
            if [ -n "$has_lvm" ] && [ "$cservice" = "clvmd" ]; then
            # allow HA-LVM to take a lock on vg/lv before clvmd can exit
            notify "Waiting for LVM services to start somewhere else"
            sleep 15
            success
            fi
            log "Attempting to shutdown $cservice"
            service $cservice stop
        fi
        done
    fi
    fi

    notify "Leaving fence domain"
    fence_tool leave -w 10
    checkrc

    fenced=$(pidof fenced)
    notify "Stopping fenced $fenced"
    kill -KILL $fenced > /dev/null 2>&1
    checkrc
}

while_false_or_timeout () {
    # $1 - command
    # $2 - timeout
    # $3 - count
    local c=0
    local result="x"
    echo ""
    while ( true ); do
        let c=c+1
        [ $c -eq $3 ] && result=1
        $1 >/dev/null || result=0
        [ $result != "x" ] && break
        sleep $2
        echo -en "."
    done
    echo ""
    return $result
}


stop()
{
    if ! status $daemons >/dev/null; then
        echo  "$desc is already stopped"
        return
    fi

    if status $prog >/dev/null; then
        notify "Signaling $desc to terminate"
        kill -TERM $(pidof $prog) > /dev/null
        while_false_or_timeout "status $daemons" 1 300
        notify "Timeout of shuting down $desc"
        checkrc
    fi

    if status $daemons >/dev/null; then
        kill -TERM $(pidof $daemons) > /dev/null
        while_false_or_timeout "status $daemons" 1 300
        notify "Timeout of shuting down $desc"
        checkrc
    fi

    rm -f "$LOCK_FILE"
    rm -f "/var/run/$prog.pid"
    if status $daemons; then
        killall -q -9 "$daemons"
    fi
    success
    echo
}

rtrn=0

case "$1" in
start)
    # For consistency with stop
    [ "$PCMK_STACK" = cman ] && cman_pre_start
    start
;;
restart|reload|force-reload)
    stop
    start
;;
condrestart|try-restart)
    if status $prog > /dev/null 2>&1; then
        stop
        start
    fi
;;
status)
    status $daemons
    rtrn=$?
;;
stop)
    #
    # stonithd needs to be around until fenced is stopped
    # fenced can't be stopped until any resource using dlm is active
    #
    # So:
    # 1. put the node into standby
    # 2. wait for all resources to be stopped
    # 3. stop fenced and anything that needs it (borrowed from the cman script)
    # 4. stop pacemaker
    # 5. stop the rest of cman (so it doesn't end up half up/down)
    #
    [ "$PCMK_STACK" = cman ] && cman_pre_stop
    stop
    [ "x$SBD_SERVICE" != x ] && service $SBD_SERVICE stop

    # Stop cman if needed, unless --skip-cman is specified (which allows
    # higher-level tooling to stop pacemaker on all nodes, then stop cman
    # on all nodes, to maintain quorum for DLM-based resources while
    # pacemaker shuts down).
    [ "$PCMK_STACK" = cman ] && [ "$2" != "--skip-cman" ] && service cman stop
;;
*)
    echo "usage: $0 {start|stop|restart|reload|force-reload|condrestart|try-restart|status}"
    rtrn=2
;;
esac

exit $rtrn
