#!/bin/bash # Basic software updater by Marc MERLIN (2003/04/17 - v1.0) # $Id: //depot/ops/corp/grhat/tree/usr/local/scripts/getupdates#55 $ # # 2003-04-23 - read target from /proc/cmdline (from kickstart) # - chown unpacked files to root # 2003-05-01 - Lots of new code to log output and errors via NFS # 2003-05-02 - Added locking, random sleep, and other improvements # 2003-05-08 - Lock after the sleep, look for updatedate and optionally re-run # already run update # 2003-05-14 - Catch errors in function tools, rotated files are immutable # More data logged for each client # 2003-05-22 - Added more info on machines (cpu/pci), set more log variables # to default values and deal with unexpected errors better # target dirs have been moved to grhat/targets to reduce clutter # Added --firstload to show machines being reloaded # 2003-06-06 - Added initial http support # Added server probe code to exit early # Rewrote to be errexit/nounset compliant # 2003-11-13 - Added trap for dies, and reworked variable passing from boot # kernel variables # 2004-01-09 - External release v1.0 / Cleaned up release for LCA 2004 # Exit if we use any uset variable (use ${VAR:-} to test for possibly unset var) set -o nounset export PATH="/usr/local/scripts:/usr/local/sbin:/usr/local/bin/:/usr/sbin:/usr/bin/:/sbin:/bin:$PATH" LOCK=/var/lock/getupdates LOGSPOOL="" LOG=/dev/null FULLLOG=/dev/null LOGSAVE=/dev/null CMDLINE=/proc/cmdline # For debugging CMDLINE2=/tmp/cmdline touch $CMDLINE2 log () { DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # If we haven't opened the log file yet, remember log messages if [ ! -f $LOG ]; then if [ ! -z "${1:-}" ]; then LOGSPOOL="$LOGSPOOL\n$DATE LOG: $1" echo "$DATE LOG: $1" fi return elif [ ! -z "$LOGSPOOL" ]; then echo -e $LOGSPOOL >> $LOG LOGSPOOL="" fi if [ -z "${1:-}" ]; then echo echo >> $LOG else echo "$DATE LOG: $1" echo "$DATE LOG: $1" >> $LOG fi } die () { DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # Log an empty line to trigger LOGSPOOL emptying should LOG have become # available log "" msg=${1:-} if [ ! -z ${LOG:-} ]; then # If we log to an NFS server, we'll pick up the error via cron if [ -f $LOG ]; then echo "$DATE DIE: $msg" # If not, let's generate a cron mail else echo "$DATE DIE: $msg" >&2 fi echo "$DATE DIE: $msg failed" >> $LOG cat $LOG >> $FULLLOG if [ -f $LOGSAVE ]; then if [ -f $LOG ]; then /bin/mv $LOGSAVE{,.0} &>/dev/null || true # Just making really sure that LOG != /dev/null /bin/mv $LOG $LOGSAVE &>/dev/null || true fi fi fi rm $LOCK &>/dev/null || true exit 1 } # Exit if any line returns an error # Actually trap ERR seems to take over errexit, so no need to define it #set -o errexit # Trap errors for logging before we die (so that they can be picked up # by the log checker) trap 'die "error line $LINENO with status $?"' ERR HOSTNAME=`hostname -f 2>/dev/null | sed 's/\.[^.]*\.[^.]*$//'` || true IP=`ifconfig eth0 | head -2 | tail -1 | sed -e "s/[^:]*://" -e "s/ .*//"` || true if [ -z "${HOSTNAME:-}" ]; then HOSTNAME=`hostname 2>/dev/null` || true log "WARNING: hostname on $HOSTNAME/$IP is a non qualified hostname or is not correctly listed in /etc/hosts, please fix" fi if [ -z "${HOSTNAME:-}" ]; then HOSTNAME="unknown" log "WARNING: Couldn't get hostname on $IP, please fix" fi # Who we ping to decide if we're up or not # The idea is not to generate stderr output (i.e. a cron mail) if we're still # down BASECHECKHOST=www.example.com # this is what we base shortcuts on. Again, this is to save space in the kernel # parameters, since they're limited to 255 chars DOMAIN=corp.example.com # We use short, sucky names because they can be fed from the kernel command # line, and it has a limit of 255 chars # TARGD is a common root added to NFSUPDPATH and HTTPUPDPATH TARGD="/grhat/targets" # Base access to update info over NFS (TARGD is appended) # It is important to split this up and mount $NFSHOST:/$NFSUPDPATH and not # $NFSHOST:/$NFSUPDPATH/$TARGD because some NFS implementations (like ontap # nfs v4 virtual mounts) will not let you mount more than 1 level deep NFSHOST="software.nfs" NFSUPDPATH="/software/" # Base access to update info over HTTP (TARGD is appended) HTTPUPDPATH="http://apt/" # Where we write output STATBASE="/auto/clientinfo" # This is mostly for install time status reporting (when /auto automount # doesn't work) NFSSTATMNT="clientinfo.nfs:/clientinfo" if [ -f /etc/sysconfig/getupdates ]; then . /etc/sysconfig/getupdates fi if [ -z "${WORKDIR:-}" ]; then WORKDIR="/var/lib/grhat" # Which this is a default, we write it to the config file anyway, because # it is read by changesets to know where they should write state and where # the grhat tmp dir is echo 'WORKDIR=/var/lib/grhat' >> /etc/sysconfig/getupdates fi epocdate2str () { echo `date -d "1970-01-01 $1 sec UTC" "+%Y/%m/%d %H:%M:%S"` } pinghost () { HOST=$1 PING=/bin/fping if [ ! -x $PING ]; then log "$PING absent, skipping fping check for $HOST" return 0 fi if ! $PING -B5.0 $BASECHECKHOST 2>&1; then # We don't die and output on stderr, we want to avoid the cron mail log "FATAL: networking seems to be down: can't reach $BASECHECKHOST" exit 1 fi $PING -B5.0 $HOST; ret=$? return $ret } getfile () { QUIET="" if [ z"${1:-}" = z-q ]; then QUIET=-q shift fi DESTFILE=`basename "$1"` FILE="$1" if [ $PROTOCOL = nfs ]; then if [ -d $UPDPATH ]; then if [ ! -f $UPDPATH/"$1" ]; then return 1 fi ln -snf $UPDPATH/"$1" $WORKDIR/tmp/ return 0 else die "Protocol NFS configured, but $UPDPATH not found" fi elif [ $PROTOCOL = http ]; then pushd $WORKDIR/tmp/ >/dev/null /bin/rm "$DESTFILE" &>/dev/null || true wget $QUIET $UPDPATH/"$1" 2>&1 err=$? popd > /dev/null return $err else die "Unknown protocol $PROTOCOL" fi return 1 } # Whether there is someone behind the keyboard (in which case we munge stderr # so that it doesn't get sent back up to the getupdate mail watchers) ATTENDED="seen|" NEWINSTALL="NEWENTRY" if [ z"${1:-}" = z--firstload ]; then NEWINSTALL="NEWINSTALL" ATTENDED="" shift fi if [ z"${1:-}" = z--sleep ]; then SLEEP=1 shift fi if [ z"${1:-}" = z--unattended ]; then ATTENDED="" shift fi if [ -z "${TARGET:-}" ]; then die "Fatal: Can't work without target, couldn't read from /etc/sysconfig/getupdates. You should type something like echo \"TARGET=wksgold\" > /etc/sysconfig/getupdates" fi if [ "$PROTOCOL" != nfs -a "$PROTOCOL" != http ]; then log "LOGWARNING: Don't know what to do with protocol $PROTOCOL, defaulting back to http" PROTOCOL=http fi for VAR in STATBASE NFSSTATMNT do var=`echo $VAR | sed 's/\(.*\)/\L\1/'` VALUE=`cat $CMDLINE $CMDLINE2 | grep "$var=." | tail -1 | sed -e "s/.*$var=//" -e "s/ .*//" -e "s/\!\!/$DOMAIN/"` if [ z"$VALUE" != z ]; then # Horrible hack to support old floppies that pass obsolete and incorrect # NFSSTATMNT instead of relying on the default (which now changed) if [ "$VAR" = NFSSTATMNT -a "$VALUE" = software:/vol/vol1/software/ ]; then log "Ignoring obsolete and incorrect $VAR=$VALUE, please make a newer boot media" continue fi eval `echo "$VAR=$VALUE"` # Yes, this log only goes to STDOUT and /dev/null, I know :) log "Overridding $VAR with $VALUE from kernel boot variables" # In theory, if you feed this several times, you'd get duplicates... echo "$VAR=$VALUE" >> /etc/sysconfig/getupdates fi done HTTPHOST=`echo $HTTPUPDPATH | sed 's/http:\/\/\([^/]*\)\/.*/\1/'` NFSSTATUSHOST=`echo $NFSSTATMNT | sed 's/:.*//'` if [ ! -z ${SLEEP:-} ]; then # This can sleep up to 20mn or so SLEEP=$(( $RANDOM/30 )) # Yes, this log only goes to STDOUT and /dev/null, I know :) log "Sleeping $SLEEP before starting" # Sometimes sleep doesn't succeed or is killed (rare, but I've seen it happen) sleep $SLEEP || true fi # shlock does the right thing and grabs a lock for a dead process # (it checks the PID in the lock file and if it's not there, it # updates the PID with the value given to -p) if ! shlock -p $$ -f $LOCK; then PID="`cat $LOCK 2>/dev/null`" # We're not calling die, because it would remove the lock file from the other process msg="Lock $LOCK held by running process $PID, quitting" DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # To stdout to go to /var/log/getupdates or stdout if run from command line echo "$DATE DIE: $msg" exit 1 fi if [ ! -z "${NFSSTATUSHOST:-}" ]; then # At install time, we don't have autofs if [ ! -d $STATBASE ]; then if ! mkdir -p $STATBASE 2>&1; then # If we make this a warn, we get a mail to know about it, but # it can be spammy. With log, it only goes in the local logs # and never gets reported since we're failing to mount the # clientinfo log dir :) # (actually we will notice that the machine is "down" after 15 days # of its logs not reaching us anymore) log "LOGWARNING: Can't create $STATBASE, autofs is probably running but failing to automount" else mount -o nolock $NFSSTATMNT $STATBASE || log "LOGWARNING: Could not mount $NFSSTATMNT" fi fi if [ `mount | grep -c $STATBASE` -gt 0 ]; then set `ifconfig eth0 | head -1` MAC=$5 # This might already exist, or mkdir might fail test -d $STATBASE/$MAC || mkdir $STATBASE/$MAC || true # Directory group is going to be changed from nfsnobody to ops # by a cronjob on stan, so it ends up being unreadable for # us. We want that. chmod 350 $STATBASE/$MAC || true # This might fail if nfs is not working for some reason cd $STATBASE/$MAC if [ $? -eq 0 ]; then # Do not exit if we get any NFS write error trap '' ERR echo $IP > ip echo $HOSTNAME > hostname echo $TARGET > target ifconfig -a > ifconfig-a route -n > route-n cat /etc/ssh/ssh_host_{,dsa_,rsa_}key.pub > ssh_hostkeys.pub 2>/dev/null # don't hang in case we have a bad NFS mount df > df 2>/dev/null & (date; echo "--------"; who | egrep ' (tty|:0)'; echo ) >> who OWNER=`who | egrep ' (tty|:0)' | head -1 | sed "s/ .*//"` [ ! -z "$OWNER" ] && echo $OWNER > owner uname -a > uname-a free > free cat /proc/cpuinfo > cpuinfo # user mode linux doesn't have /proc/bus/pci lspci 2>/dev/null > lspci ps -auxww > ps-auxww rpm -qa | sort > rpm-qa if [ -f /etc/sysconfig/rhn/up2date-uuid ]; then grep ^rhnuuid /etc/sysconfig/rhn/up2date-uuid > uuid fi LOG=$STATBASE/$MAC/log.update LOGSAVE=$STATBASE/$MAC/log.last FULLLOG=$STATBASE/$MAC/log if [ -s $LOG ]; then DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` log "$DATE LOGWARNING: File didn't get appended to log at last run" cat $LOG >> $FULLLOG /bin/mv $LOGSAVE{,.0} &>/dev/null || true /bin/mv $LOG $LOGSAVE fi DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` echo -e "\n\n$DATE $NEWINSTALL: $IP/$HOSTNAME\n--------------------------------------------------------------------------------" >> $LOG trap 'die "error line $LINENO with status $?"' ERR fi cd $STATBASE ln -snf $MAC $IP ln -snf $MAC $HOSTNAME # We don't want to stay in /auto/clientinfo, it's unreadable cd $WORKDIR fi # Hopefully we won't have anything on stderr if this script isn't buggy # but should it happen... if [ -z "$ATTENDED" -a z"$LOG" != z/dev/null ]; then exec 2>>$LOG fi fi if [ "$PROTOCOL" = nfs ]; then if ! pinghost $NFSHOST; then # Only generate stderr and a cron error mail if we aren't NFS logging if [ "$LOG" != /dev/null ]; then log "FATAL: PING: Can't ping $NFSHOST (was configured for NFS)" die else die "PING: Can't ping $NFSHOST (was configured for NFS)" fi fi UPDPATH="$NFSUPDPATH/$TARGD/$TARGET" elif [ "$PROTOCOL" = http ]; then if ! pinghost $HTTPHOST; then # Only generate stderr and a cron error mail if we aren't NFS logging if [ "$LOG" != /dev/null ]; then log "FATAL: PING: Can't ping $HTTPHOST (was configured for NFS)" die else die "PING: Can't ping $HTTPHOST (was configured for NFS)" fi fi UPDPATH="$HTTPUPDPATH/$TARGD/$TARGET" fi log "Syncing against target $UPDPATH via $PROTOCOL" if [ $EUID -ne 0 ]; then die "$0 needs to run as root" fi if [ ! -d $WORKDIR/tmp ]; then mkdir -p $WORKDIR/tmp || die "Can't mkdir $WORKDIR/tmp" fi if [ ! -d $WORKDIR/work ]; then mkdir -p $WORKDIR/work || die "Can't mkdir $WORKDIR/work" fi if [ ! -d $WORKDIR/state ]; then mkdir -p $WORKDIR/state || die "Can't mkdir $WORKDIR/state" fi if [ ! -f $WORKDIR/state/changedfiles ]; then touch $WORKDIR/state/changedfiles || die "Can't create $WORKDIR/state/changedfiles" fi cd $WORKDIR getfile "list" || die "Could not retreive master list file from \"$UPDPATH\" via $PROTOCOL" mv $WORKDIR/tmp/list . OLDIFS=$IFS IFS=" " # There is a weird shell interaction bug, so let's slurp the file first # and then replay the tag values TAGS="" while read line do # Skip comments if [ `echo $line | grep "^#" | wc -l` -eq 1 ]; then continue fi TAGS="$TAGS $line" done < $WORKDIR/list IFS=$OLDIFS /bin/rm $WORKDIR/list cd $WORKDIR/tmp for UPDNAME in $TAGS do if [ -f "$WORKDIR/state/$UPDNAME" ]; then if getfile -q "$UPDNAME/updatedate"; then LASTRUN=`cat $WORKDIR/state/$UPDNAME` if [ -z ${LASTRUN:-} ]; then LASTRUN=0 fi UPDDATE=`cat updatedate` LASTRUNS=`epocdate2str $LASTRUN` UPDDATES=`epocdate2str $UPDDATE` if [ $LASTRUN -lt $UPDDATE ]; then log "Running update $UPDNAME, last done at $LASTRUNS and updated at $UPDDATES" else log "Not running update $UPDNAME, already done at $LASTRUNS (update dates to $UPDDATES)" continue fi else log "Not running update $UPDNAME, already done" continue fi else log "Getting and running update $UPDNAME" fi if [ -d files ]; then rm -rf files fi getfile -q "$UPDNAME/runme" || die "Error, nothing to do for $UPDNAME. No $UPDDIR/runme" # We want this for http, but not nfs, since it's a symlink to a RO FS chmod 755 runme &>/dev/null || true # Not getting this isn't a fatal error, it may not be required by # runme. Note that getfile will delete the previous file that could be # on disk getfile "$UPDNAME/workfiles.tar.bz2" && tar xjf workfiles.tar.bz2 # I think removing the trap on ERR isn't needed because we run a subshell, # but better be safe than sorry :) trap '' ERR # We prefix stderr output for grepping in the logs, unless the said # output is known and prefixed with ok-> (in which case we strip that) ( ( ./runme $UPDNAME 3>&2 2>&1 1>&3 ; echo $? >$WORKDIR/tmp/err ) | sed -e "s/^/stderr->/" -e "s/^stderr->ok->//" -e "s/^stderr->/${ATTENDED}stderr->/" ) 2>&1 | tee -a $LOG errlev=`cat $WORKDIR/tmp/err` trap 'die "error line $LINENO with status $?"' ERR if [ z"$errlev" != z0 ]; then die "$UPDNAME/runme failed to run properly (error $errlev), aborting..." fi if [ `find files -type f | wc -l` -gt 0 ]; then echo "FILEWARN: files left behind after running $UPDNAME" | tee -a $LOG find files -type f | sed "s/^files/FILEWARN: /" | tee -a $LOG fi log rm -rf files || true date "+%s" > "$WORKDIR/state/$UPDNAME" done cat $LOG >> $FULLLOG if [ -f $LOGSAVE ]; then if [ -f $LOG ]; then /bin/mv $LOGSAVE{,.0} &>/dev/null || true # Just making really sure that LOG != /dev/null /bin/mv $LOG $LOGSAVE &>/dev/null || true fi elif [ -f $LOG ]; then /bin/mv $LOG $LOGSAVE &>/dev/null || true fi