195 lines
5.6 KiB
Bash
195 lines
5.6 KiB
Bash
#!/bin/bash
|
|
#Roy Cohen :roy@proteon.com
|
|
#Proteon B.V. :Zuid Hollandlaan 7, 2596 AL Den Haag
|
|
#objective :OpenShift pathing tool using yum (RHEL and Centos)
|
|
#First line of code :04/09/2019
|
|
#last update :06/09/2019
|
|
#version :0.1
|
|
#inspred by /home/freark/bin/batch_upgrade.sh
|
|
|
|
|
|
#Check if root is running the script
|
|
if [[ $EUID -ne 0 ]]; then
|
|
echo "This script must be run as root"
|
|
exit 1
|
|
fi
|
|
|
|
# Parameters
|
|
NAGIOS_SCRIPT='/opt/proteon-scripts/minions/nagios/generic/nagios_add_downtime_host.sh'
|
|
BASE_LOG_DIR=/var/log/batch_upgrade
|
|
TIMESTAMP=$(date +%Y%m%d.%H%M)
|
|
|
|
#set the master host
|
|
read -p "Enter OpenShift master: " OS_MASTER
|
|
|
|
#Set node varible
|
|
OS_NODES_READY=$(ssh -n $OS_MASTER "oc get nodes" |grep "Ready" | cut -d " " -f1)
|
|
COUNT_OF_OS_NODES_DRAINED=$(ssh -n $OS_MASTER "oc get nodes" |egrep "NotReady|SchedulingDisabled" |wc -l)
|
|
#TODO what if there are more that one node already driand?
|
|
if [[ "$COUNT_OF_OS_NODES_DRAINED" -gt 0 ]] ; then
|
|
echo -e "n\ NOTE: There are driand nodes on the cluster\n
|
|
Please chack the master node for possible issues"
|
|
exit 1
|
|
fi
|
|
|
|
###############################OPENSHIFT PACHING BEGINS HERE###############################
|
|
#list nodes on the OpenShift Master
|
|
function list_nodes()
|
|
{
|
|
printf '%s\n' "${OS_NODES_READY[@]}"
|
|
set_update_host
|
|
}
|
|
|
|
#set the hostname for the update
|
|
function set_update_host()
|
|
{
|
|
echo "Set the OpenShift node that needs to updated or Press ^c to bailout/finish"
|
|
read -p "Enter OpenShift node: " HOST_TO_UPDATE
|
|
}
|
|
|
|
#Breack in an error
|
|
function pause_on_error {
|
|
if [ $1 -ne 0 ]
|
|
then
|
|
echo -e "\e[31mThe last command resulted in an error.\e[0m Press ^c to bailout or enter to continue"
|
|
read
|
|
fi
|
|
}
|
|
|
|
#Add nagios downtime
|
|
function nagios_downtime()
|
|
{
|
|
$NAGIOS_SCRIPT --host=$HOST_TO_UPDATE --comment='Batch Updating Host'
|
|
echo "Adding downtime for host $HOST_TO_UPDATE..."
|
|
pause_on_error $?
|
|
}
|
|
|
|
#just a counter, one minute
|
|
function time_counter()
|
|
{
|
|
count=0
|
|
total=120
|
|
|
|
while [ $count -lt $total ]; do
|
|
sleep 0.5 # this is work
|
|
count=$(( $count + 1 ))
|
|
pd=$(( $count * 60 / $total ))
|
|
printf "\r${pd}s" $(( $count * 60 / $total ))
|
|
done
|
|
}
|
|
|
|
#Drain the node
|
|
function drain()
|
|
{
|
|
echo "Draining $HOST_TO_UPDATE"
|
|
ssh -n $OS_MASTER "oc adm drain --ignore-daemonsets $HOST_TO_UPDATE"
|
|
echo "
|
|
________________________________________________________________________
|
|
< This will take a minute, Just making sure that the node is fully drained >
|
|
------------------------------------------------------------------------
|
|
\ ^__^
|
|
\ (oo)\_______
|
|
(__)\ )\/
|
|
||----w |
|
|
|| ||
|
|
|
|
"
|
|
time_counter
|
|
pause_on_error $?
|
|
}
|
|
|
|
#Update node
|
|
function update()
|
|
{
|
|
mkdir -p ${BASE_LOG_DIR}
|
|
LOG_DIR=$(mktemp -d ${BASE_LOG_DIR}/${TIMESTAMP}.XXXX)
|
|
|
|
echo -e "n\I'm now going to update $HOST_TO_UPDATE."
|
|
echo "You can follow/check logs in:"
|
|
echo ${LOG_DIR}
|
|
echo "Use for instance 'tail -f ${LOG_DIR}/$HOST_TO_UPDATE' in a different terminal after continuing here."
|
|
echo "If any fail you MUST check and update these before proceeding!"
|
|
echo -e "\e[33mPress ^c to bailout or enter to continue\e[0m"
|
|
read
|
|
echo "This may take some time..."
|
|
echo "Please check the packge list, if docker/openshift/atomic are incluted than, STOP the update by reapling N to yum or Press ^c "
|
|
|
|
#exclude openshift and docker
|
|
ssh -n $HOST_TO_UPDATE "cat /etc/yum.conf | grep -v exclude >/tmp/yum.conf && echo "exclude= docker* atomic* " >> /tmp/yum.conf && cat /tmp/yum.conf >/etc/yum.conf"
|
|
#update command
|
|
ssh -t $HOST_TO_UPDATE "sleep 10 && yum update --security" | tee ${LOG_DIR}/$HOST_TO_UPDATE
|
|
pause_on_error $?
|
|
}
|
|
|
|
#reboot node
|
|
function reboot_server()
|
|
{
|
|
LOG_DIR=$(mktemp -d ${BASE_LOG_DIR}/${TIMESTAMP}.XXXX)
|
|
|
|
|
|
echo "Restarting server $HOST_TO_UPDATE"
|
|
#ANSWER='dummyvalue'
|
|
while [[ -n "${ANSWER}" || ( "${ANSWER}" != 'c' && "${ANSWER}" != 's' ) ]]; do
|
|
echo -e "\e[33mPress 'c' continue, 's' to skip, or ^c to bail out...\e[0m"
|
|
read ANSWER
|
|
if [[ "${ANSWER}" == 's' ]]; then
|
|
echo "Skipping $HOST_TO_UPDATE"
|
|
return 0
|
|
fi
|
|
if [[ "${ANSWER}" == 'c' ]]; then
|
|
echo "Proceeding to reboot $HOST_TO_UPDATE"
|
|
break
|
|
fi
|
|
done
|
|
|
|
timeout 4 ssh -o ConnectTimeout=3 $HOST_TO_UPDATE "reboot"
|
|
# no pause on reboot, because often you get kicked out of the server too quickly causing an non-zero exitcode
|
|
# pause_on_error $?
|
|
|
|
# wait until the server is down
|
|
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
|
|
rc=$?
|
|
while [[ $rc == 0 ]]
|
|
do
|
|
echo "Waiting for $HOST_TO_UPDATE to be down..."
|
|
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
|
|
rc=$?
|
|
sleep 0.5
|
|
done
|
|
|
|
echo "Server $HOST_TO_UPDATE appears to be down..."
|
|
|
|
# wait until...
|
|
# server pings again
|
|
|
|
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
|
|
rc=$?
|
|
while [[ $rc != 0 ]]
|
|
do
|
|
echo "Waiting for $HOST_TO_UPDATE to be back up..."
|
|
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
|
|
rc=$?
|
|
sleep 5
|
|
done
|
|
}
|
|
|
|
#uncordon node in the OpenShift master
|
|
function uncordon()
|
|
{
|
|
echo "------------------------------------"
|
|
echo "Reactivating $HOST_TO_UPDATE in the OpenShift master $OS_MASTER"
|
|
ssh -n $OS_MASTER "oc adm uncordon $HOST_TO_UPDATE"
|
|
list_nodes
|
|
pause_on_error $?
|
|
}
|
|
|
|
|
|
############################### -MAIN RUN- ###############################
|
|
|
|
list_nodes
|
|
nagios_downtime
|
|
drain
|
|
update
|
|
reboot_server
|
|
uncordon
|