Files
Bash_Scrips/scripts/tools/OS_patching_tool.sh
2025-06-23 21:19:51 +02:00

195 lines
5.6 KiB
Bash

#!/bin/bash
#Roy Cohen :roy@proteon.com
#Proteon B.V. :Zuid Hollandlaan 7, 2596 AL Den Haag
#objective :OpenShift pathing tool using yum (RHEL and Centos)
#First line of code :04/09/2019
#last update :06/09/2019
#version :0.1
#inspred by /home/freark/bin/batch_upgrade.sh
#Check if root is running the script
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
exit 1
fi
# Parameters
NAGIOS_SCRIPT='/opt/proteon-scripts/minions/nagios/generic/nagios_add_downtime_host.sh'
BASE_LOG_DIR=/var/log/batch_upgrade
TIMESTAMP=$(date +%Y%m%d.%H%M)
#set the master host
read -p "Enter OpenShift master: " OS_MASTER
#Set node varible
OS_NODES_READY=$(ssh -n $OS_MASTER "oc get nodes" |grep "Ready" | cut -d " " -f1)
COUNT_OF_OS_NODES_DRAINED=$(ssh -n $OS_MASTER "oc get nodes" |egrep "NotReady|SchedulingDisabled" |wc -l)
#TODO what if there are more that one node already driand?
if [[ "$COUNT_OF_OS_NODES_DRAINED" -gt 0 ]] ; then
echo -e "n\ NOTE: There are driand nodes on the cluster\n
Please chack the master node for possible issues"
exit 1
fi
###############################OPENSHIFT PACHING BEGINS HERE###############################
#list nodes on the OpenShift Master
function list_nodes()
{
printf '%s\n' "${OS_NODES_READY[@]}"
set_update_host
}
#set the hostname for the update
function set_update_host()
{
echo "Set the OpenShift node that needs to updated or Press ^c to bailout/finish"
read -p "Enter OpenShift node: " HOST_TO_UPDATE
}
#Breack in an error
function pause_on_error {
if [ $1 -ne 0 ]
then
echo -e "\e[31mThe last command resulted in an error.\e[0m Press ^c to bailout or enter to continue"
read
fi
}
#Add nagios downtime
function nagios_downtime()
{
$NAGIOS_SCRIPT --host=$HOST_TO_UPDATE --comment='Batch Updating Host'
echo "Adding downtime for host $HOST_TO_UPDATE..."
pause_on_error $?
}
#just a counter, one minute
function time_counter()
{
count=0
total=120
while [ $count -lt $total ]; do
sleep 0.5 # this is work
count=$(( $count + 1 ))
pd=$(( $count * 60 / $total ))
printf "\r${pd}s" $(( $count * 60 / $total ))
done
}
#Drain the node
function drain()
{
echo "Draining $HOST_TO_UPDATE"
ssh -n $OS_MASTER "oc adm drain --ignore-daemonsets $HOST_TO_UPDATE"
echo "
________________________________________________________________________
< This will take a minute, Just making sure that the node is fully drained >
------------------------------------------------------------------------
\ ^__^
\ (oo)\_______
(__)\ )\/
||----w |
|| ||
"
time_counter
pause_on_error $?
}
#Update node
function update()
{
mkdir -p ${BASE_LOG_DIR}
LOG_DIR=$(mktemp -d ${BASE_LOG_DIR}/${TIMESTAMP}.XXXX)
echo -e "n\I'm now going to update $HOST_TO_UPDATE."
echo "You can follow/check logs in:"
echo ${LOG_DIR}
echo "Use for instance 'tail -f ${LOG_DIR}/$HOST_TO_UPDATE' in a different terminal after continuing here."
echo "If any fail you MUST check and update these before proceeding!"
echo -e "\e[33mPress ^c to bailout or enter to continue\e[0m"
read
echo "This may take some time..."
echo "Please check the packge list, if docker/openshift/atomic are incluted than, STOP the update by reapling N to yum or Press ^c "
#exclude openshift and docker
ssh -n $HOST_TO_UPDATE "cat /etc/yum.conf | grep -v exclude >/tmp/yum.conf && echo "exclude= docker* atomic* " >> /tmp/yum.conf && cat /tmp/yum.conf >/etc/yum.conf"
#update command
ssh -t $HOST_TO_UPDATE "sleep 10 && yum update --security" | tee ${LOG_DIR}/$HOST_TO_UPDATE
pause_on_error $?
}
#reboot node
function reboot_server()
{
LOG_DIR=$(mktemp -d ${BASE_LOG_DIR}/${TIMESTAMP}.XXXX)
echo "Restarting server $HOST_TO_UPDATE"
#ANSWER='dummyvalue'
while [[ -n "${ANSWER}" || ( "${ANSWER}" != 'c' && "${ANSWER}" != 's' ) ]]; do
echo -e "\e[33mPress 'c' continue, 's' to skip, or ^c to bail out...\e[0m"
read ANSWER
if [[ "${ANSWER}" == 's' ]]; then
echo "Skipping $HOST_TO_UPDATE"
return 0
fi
if [[ "${ANSWER}" == 'c' ]]; then
echo "Proceeding to reboot $HOST_TO_UPDATE"
break
fi
done
timeout 4 ssh -o ConnectTimeout=3 $HOST_TO_UPDATE "reboot"
# no pause on reboot, because often you get kicked out of the server too quickly causing an non-zero exitcode
# pause_on_error $?
# wait until the server is down
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
rc=$?
while [[ $rc == 0 ]]
do
echo "Waiting for $HOST_TO_UPDATE to be down..."
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
rc=$?
sleep 0.5
done
echo "Server $HOST_TO_UPDATE appears to be down..."
# wait until...
# server pings again
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
rc=$?
while [[ $rc != 0 ]]
do
echo "Waiting for $HOST_TO_UPDATE to be back up..."
ssh -o ConnectTimeout=2 $HOST_TO_UPDATE true
rc=$?
sleep 5
done
}
#uncordon node in the OpenShift master
function uncordon()
{
echo "------------------------------------"
echo "Reactivating $HOST_TO_UPDATE in the OpenShift master $OS_MASTER"
ssh -n $OS_MASTER "oc adm uncordon $HOST_TO_UPDATE"
list_nodes
pause_on_error $?
}
############################### -MAIN RUN- ###############################
list_nodes
nagios_downtime
drain
update
reboot_server
uncordon