#!/bin/bash #Roy Cohen :roy@wondercohen.nl #objective :Gluster checks for Nagios #First line of code :15/01/2019 #last update :17/01/2019 #version :0.1 #status :Not PRD ready #Some general vars #Server IP adress in the storage network TODO: osg servers STORAGE_IP=$(ip -4 addr show dev storage | grep -oP '(?<=inet\s)\d+(\.\d+){3}') #number of active bricks on server acourding to gluster NUMBER_OF_ACTIVE_BRICKS=$(gluster volume info|grep $STORAGE_IP| wc -l) #name of active volumes on server acourding to gluster GLUSTER_ACTIVE_VOLUM_NAMES=$(gluster volume info|grep $STORAGE_IP | cut -d "/" -f3) #numer of glusterfsd (brick daemon) running processes GLUSTER_BRICK_NUMBER_PID=$(pidof glusterfsd |wc -w) #glusterd pid status GLUSTERD_STAT=$(pidof glusterd &>/dev/null) #glusterfsd (brick daemon) pid status GLUSTER_BRICK_STAT=$(pidof glusterfsd &>/dev/null) ######START OF SCRIPT####### check_if_glusterd_is_running() { #check if glusterd is running if ! $GLUSTERD_STAT &>/dev/null; then echo "CRITICAL: glusterd management daemon not running" else echo "OK: glusterd management daemon is running" fi } check_if_glusterfsd_is_running() { # check for glusterfsd (brick daemon) if ! $GLUSTER_BRICK_STAT; then echo "CRITICAL: glusterfsd brick daemon not running" else echo "OK: glusterfsd brick daemon is running" fi } check_if_a_brick_is_down() { # check number of active bricks if [ "$NUMBER_OF_ACTIVE_BRICKS" == "$GLUSTER_BRICK_NUMBER_PID" ] ; then echo "OK: all bricks are active" else echo "CRITICAL: There is one or more bricks down" fi } check_heal_status_of_a_volume() { # get volume heal status and redirect it to a tmp file for volume in $(gluster volume info|grep $STORAGE_IP | cut -d "/" -f3) do echo "volumes, $volume" gluster v heal $volume info |grep entries|cut -d ":" -f2 done > /tmp/gluster_monitoring OUT=$(cat /tmp/gluster_monitoring|grep -v volumes |sed "s/^[ \t]*//" |grep -v ^0 > /dev/null 2>&1) if [ "$?" -eq 0 ]; then echo "WARNING: volumes healing at this moment $(cat /tmp/gluster_monitoring |sed "s/^[ \t]*//" | grep -v ^0)" else echo "OK: no volumes healing at this moment" fi echo "" > /tmp/gluster_monitoring } #### CALL THE FUNCTIONS ###### check_if_glusterd_is_running check_if_glusterfsd_is_running check_if_a_brick_is_down check_heal_status_of_a_volume # some logs test #cat /var/log/glusterfs/glusterd.log|grep "E" |grep "failed" # oc rsh -n glusterfs $(oc get pods -n glusterfs | grep Running | grep 'glusterfs-storage' | head -n 1 | cut -d' ' -f1) gluster volume status all if [] OUT=$(cat /tmp/gluster_monitoring|grep -v volumes |sed "s/^[ \t]*//" |grep -v ^0 > /dev/null 2>&1) if [ "$?" -eq 0 ]; then echo "WARNING: volumes healing at this moment $(cat /tmp/gluster_monitoring |sed "s/^[ \t]*//" | grep -v ^0)" else echo "OK: no volumes healing at this moment" fi echo "" > /tmp/gluster_monitoring