How to check the health of database instances
This script checks the named instances, or all instances listed in the oratab file with a Y in the third field.
########################################################
#
# Synopsis: db_check.sh [-b] [sid ...]
# Purpose: to check the health of database instances
#
# Author: Steve Adams (based on various similar scripts)
#
# Description: This script checks the named instances, or all instances
# listed in the oratab file with a Y in the third field.
# The -b option should be used during the backup window.
#
# It may be used in 3 ways:
# * interactively
# * from cron
# * from other scripts
#
# The following checks are performed:
# * a listener must be running
# * each instance must be available
# * instances must not be in resticted session mode
# * auto-archiving must be enabled in archivelog mode
# * archiver processes must not be stuck
# * tablespaces must not be in hot backup mode
# During the backup window, instances may be down, or in
# restricted session mode, and tablespaces may be in hot
# backup mode.
#
# If any checks fail, they are reported:
# * to the terminal, if running interactively
# * to the system log via logger (syslog)
# * with a non-zero exit status
#
########################################################
# set variables
#
export PROGRAM=${0##*/} # program name
export ORATAB # path to the oratab file
export INTERACTIVE=: # whether to print messages to stdout
export DEBUG= # whether to actually log messages
# export DEBUG=print # (uncomment this line while debugging)
export FACILITY=oracle # change to "user" if logger complains
export BACKUP=false # are we in the backup window
export ORACLE_SID= # the instance being checked
export SPOOL=/tmp/$$.spool # the spool file for output
export READY=/tmp/$$.ready # file to flag that output is ready
export STATUS=0 # exit status
# find the oratab file
# if none, assume no Oracle and quit
#
{ ORATAB=/etc/opt/oracle/oratab && [[ -r $ORATAB ]] ; } ||
{ ORATAB=/var/opt/oracle/oratab && [[ -r $ORATAB ]] ; } ||
{ ORATAB=/etc/oratab && [[ -r $ORATAB ]] ; } ||
exit 0
# are we running interactively?
# if not, we must have logger
#
tty -s && INTERACTIVE=print || whence logger > /dev/null || exit 1
# check that a listener is running
#
if ps -fu oracle | grep -v grep | grep -c tnslsnr >/dev/null
then
$INTERACTIVE "Listener is running"
else
msg="$PROGRAM: Listener not running - no network access to Oracle"
$DEBUG logger -p $FACILITY.err "$msg"
STATUS=1
$INTERACTIVE $msg
fi
# check for oraenv
#
whence oraenv > /dev/null ||
{
msg="$PROGRAM: Cannot check Oracle - oraenv not in PATH"
$DEBUG logger -p $FACILITY.warning "$msg"
$INTERACTIVE $msg
exit 1
}
# are we in the backup window
#
[[ $1 = -b ]] && { BACKUP=true; shift; }
# check specified instances or all auto-started instances
#
for ORACLE_SID in ${*-$(awk -F: '!/^#/ && $3=="Y" {print $1}' $ORATAB)}
do
# set environment and check for sqlplus
#
ORAENV_ASK=NO . oraenv
whence sqlplus > /dev/null ||
{
msg="$PROGRAM: Cannot check Oracle - sqlplus not in PATH"
$DEBUG logger -p $FACILITY.warning "$msg"
STATUS=1
$INTERACTIVE $msg
continue
}
# check connectivity for an ordinary user
# (we expect an ORA-01017 error; most others mean something)
#
rm -f $READY
print "
connect nobody/really
host touch $READY
exit " |
sqlplus /nolog > $SPOOL &
# wait for up to 59 seconds
#
((timeout = 60))
while ((timeout -= 1)) && [[ ! -r $READY ]]
do
sleep 1
done
# check for hang
#
[[ -r $READY ]] ||
{
kill $!
msg="$PROGRAM: Oracle instance $ORACLE_SID is not responding"
$DEBUG logger -p $FACILITY.err "$msg"
STATUS=1
$INTERACTIVE $msg
continue
}
# check for other problems
#
ERROR=$(sed -n 's/.*ORA-\([0-9]*\):.*/\1/p' $SPOOL|head -1)
case $ERROR in
#
# this is what we expect
#
01017) # invalid username/password
;;
#
# this group are not expected, but OK anyway
#
"") # no error (connected OK)
;;
01035) # instance in restricted session mode
;;
01040) # invalid password
;;
01045) # no create session priv
;;
#
# any other error is a problem
#
00257) # archiver stuck
#
msg="$PROGRAM: Oracle archiver for $ORACLE_SID is stuck"
$DEBUG logger -p $FACILITY.alert "$msg"
STATUS=1
$INTERACTIVE $msg
;;
01034) # Oracle not available
#
msg="$PROGRAM: Oracle instance $ORACLE_SID is not up"
$BACKUP || $DEBUG logger -p $FACILITY.err "$msg"
$BACKUP || STATUS=1
$BACKUP || $INTERACTIVE $msg
continue
;;
*) # something else
#
msg="$PROGRAM: Got Oracle error ORA-$ERROR from $ORACLE_SID"
$DEBUG logger -p $FACILITY.err "$msg"
STATUS=1
$INTERACTIVE $msg
continue
;;
esac
$INTERACTIVE "Oracle instance $ORACLE_SID is up"
# now connect internal and check everything else
#
{ SVRMGRL=svrmgrl && whence $SVRMGRL > /dev/null ; } ||
{ SVRMGRL=sqldba && whence $SVRMGRL > /dev/null ; } ||
{
msg="$PROGRAM: Cannot check Oracle - svrmgrl not in PATH"
$DEBUG logger -p $FACILITY.warning "$msg"
STATUS=1
$INTERACTIVE $msg
continue
}
rm -f $READY
print "
connect internal
select '/actives=' || count(*) from v\$backup where status = 'ACTIVE';
select '/restrict=' || value from v\$instance where key like 'RESTR%';
select '/restrict=' || logins from v\$instance;
archive log list;
host touch $READY
exit " |
$SVRMGRL > $SPOOL &
# wait for up to 89 seconds
#
((timeout = 90))
while ((timeout -= 1)) && [[ ! -r $READY ]]
do
sleep 1
done
# check for hang (most unlikely at this point)
#
[[ -r $READY ]] ||
{
kill $!
msg="$PROGRAM: Oracle instance $ORACLE_SID is not responding"
$DEBUG logger -p $FACILITY.err "$msg"
STATUS=1
$INTERACTIVE $msg
continue
}
# check for active backups, archiver and restricted session
#
eval $(sed -n 's:^/\(.*=.*\):\1:p' $SPOOL)
[[ $actives -eq 0 ]] ||
{
msg="$PROGRAM: Database $ORACLE_SID has files in hot backup mode"
$BACKUP || $DEBUG logger -p $FACILITY.warning "$msg"
$BACKUP || STATUS=1
$BACKUP || $INTERACTIVE $msg
}
eval $(awk '/Database log mode/ {print "mode=" $(NF-2)}
/Automatic archival/ {print "auto=" $NF}' $SPOOL)
[[ $auto = Enabled || $mode = No ]] ||
{
msg="$PROGRAM: Oracle archiver for $ORACLE_SID is not running"
$DEBUG logger -p $FACILITY.err "$msg"
STATUS=1
$INTERACTIVE $msg
}
[[ $restrict = 0 || $restrict = ALLOWED ]] ||
{
msg="$PROGRAM: Oracle instance $ORACLE_SID is in restricted mode"
$BACKUP || $DEBUG logger -p $FACILITY.warning "$msg"
$BACKUP || STATUS=1
$BACKUP || $INTERACTIVE $msg
}
done
# cleanup
#
$DEBUG rm -f $SPOOL
$DEBUG rm -f $READY
exit $STATUS