#!/bin/bash # # Basic software updater by Marc MERLIN (2003/04/17 - v1.0) # Portions Copyright Google, Inc # License: GPL v2 # # $Id: //depot/ops/corp/goobian/files/usr/local/scripts/getupdates#3 $ # # 2003-04-23 - read target from /proc/cmdline (from kickstart) # - chown unpacked files to root # 2003-05-01 - Lots of new code to log output and errors via NFS # 2003-05-02 - Added locking, random sleep, and other improvements # 2003-05-08 - Lock after the sleep, look for updatedate and optionally re-run # already run update # 2003-05-14 - Catch errors in function tools, rotated files are immutable # More data logged for each client # 2003-05-22 - Added more info on machines (cpu/pci), set more log variables # to default values and deal with unexpected errors better # target dirs have been moved to goobian/targets to reduce clutter # Added --firstload to show machines being reloaded # 2003-06-06 - Added initial http support # Added server probe code to exit early # Rewrote to be errexit/nounset compliant # 2003-11-13 - Added trap for dies, and reworked variable passing from boot # kernel variables # xxxx-xx-xx - other many changes recorded p4 # 2004-01-09 - External release v1.0 / Cleaned up release for LCA 2004 # 2005-02-16 - Second outside release # 2005-06-14 - Major reorg/cleanup # Exit if we use any uset variable (use ${VAR:-} to test for possibly unset var) set -o nounset export PATH="/usr/local/scripts:/usr/local/sbin:/usr/local/bin/:/usr/sbin:/usr/bin/:/sbin:/bin:$PATH" LOCK=/var/lock/getupdates LOGSPOOL="" LOG=/dev/null FULLLOG=/dev/null LOGSAVE=/dev/null # Get the functions and variables, which in turn parses # /etc/sysconfig/getupdates and gives us all the default variables we may need . /var/lib/getupdates/funcs # We then override the log and die functions with our own log () { DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # If we haven't opened the log file yet, remember log messages if [ ! -f $LOG ]; then if [ ! -z "${1:-}" ]; then LOGSPOOL="$LOGSPOOL\n$DATE LOG: $1" echo "$DATE LOG: $1" fi return elif [ ! -z "$LOGSPOOL" ]; then echo -e $LOGSPOOL >> $LOG LOGSPOOL="" fi if [ -z "${1:-}" ]; then echo echo >> $LOG else echo "$DATE LOG: $1" echo "$DATE LOG: $1" >> $LOG fi } die () { # don't loop on ERR trap '' ERR DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # Log an empty line to trigger LOGSPOOL emptying should LOG have become # available log "" msg=${1:-} more_info="IP:${IP:-UNSET} MAC:${MAC:-UNSET}" if [ ! -z ${LOG:-} ]; then # If we log to an NFS server, we'll pick up the error via cron if [ -f $LOG ]; then echo "$DATE DIE: $msg" echo "$DATE DIE: $more_info" # If not, let's generate a cron mail (i.e. stderr which then gets # picked up by tron) else echo "$DATE DIE: $msg" >&2 echo "$DATE DIE: $more_info" >&2 fi echo "$DATE DIE: $msg failed" >> $LOG echo "$DATE DIE: $more_info" >> $LOG cat $LOG >> $FULLLOG if [ -f $LOGSAVE ]; then if [ -f $LOG ]; then /bin/mv $LOGSAVE{,.0} &>/dev/null || true # Just making really sure that LOG != /dev/null /bin/mv $LOG $LOGSAVE &>/dev/null || true fi fi fi # This is a nice and fancy shell core dumper if echo $msg | grep -q 'error line .* with status'; then line=`echo $msg | sed 's/.*error line \(.*\) with status.*/\1/'` # prepending DIE allows for the log watcher to report this message # back in the hourly cron mail echo " DIE: Code dump:" nl -ba $0 | grep -3 "\b$line\b" | sed "s/^/ DIE: /" fi rm $LOCK &>/dev/null || true exit 1 } # Trap errors for logging before we die (so that they can be picked up # by the log checker) trap 'die "error line $LINENO with status $?"' ERR usage() { cat <&1; then # If we make this a warn, we get a mail to know about it, but # it can be spammy but with log, it only goes in the local logs # and never gets reported since we're failing to mount the # clientinfo log dir :) # (actually we will notice that the machine is "down" after 15 days # of its logs not reaching us anymore) log "LOGWARNING: Can't create $STATBASE, autofs is probably running but failing to automount" else # ok, the dir is there, so we try to mount it then mount -o nolock $NFSSTATMNT $STATBASE || \ log "LOGWARNING: Could not mount $NFSSTATMNT" fi # else dir is there, but make sure it has clientinfo data elif [ ! -e $STATBASE/mac ]; then mount -o nolock $NFSSTATMNT $STATBASE || \ log "LOGWARNING: Could not mount $NFSSTATMNT" fi if [ -e $STATBASE/mac ]; then # Do not exit if we get any NFS write error trap '' ERR # This might already exist, or mkdir might fail test -d $STATBASE/mac/$MAC || mkdir $STATBASE/mac/$MAC || true # Directory group is going to be changed from nfsnobody to ops # by a cronjob on stan, so it ends up being unreadable for # us. We want that. chmod 350 $STATBASE/mac/$MAC || true # This might fail if nfs is not working for some reason cd $STATBASE/mac/$MAC if [ $? -eq 0 ]; then echo $IP > ip echo $HOSTNAME > hostname echo $TARGET > target echo $SNAP > snapshot cat /etc/sysconfig/getupdates > getupdates.conf ifconfig -a > ifconfig-a route -n > route-n cat /etc/ssh/ssh_host_{,dsa_,rsa_}key.pub > ssh_hostkeys.pub 2>/dev/null # don't hang in case we have a bad NFS mount df > df 2>/dev/null & mount > mount 2>/dev/null & (date; echo "--------"; who | egrep ' (tty|:0)'; echo ) >> who (date; echo "--------"; w -f; echo ) >> w-f OWNER=`who | egrep ' (tty|:0)' | head -1 | sed "s/ .*//"` [ ! -z "$OWNER" ] && echo $OWNER > owner uname -a > uname-a free > free uptime > uptime cat /proc/cpuinfo > cpuinfo # user mode linux doesn't have /proc/bus/pci lspci 2>/dev/null > lspci ps auxww > ps-auxww netstat -n > netstat-n netstat -nlp > netstat-nlp rpm -qa 2>/dev/null | sort > rpm-qa 2>/dev/null #rpm -qa --queryformat="%{NAME} %{VERSION} %{RELEASE} %{EPOCH}\n" | sort > rpm-qa.split 2>/dev/null #rpm -qa --queryformat="\n"| sort > rpm-qa.xml 2>/dev/null COLUMNS=300 dpkg -l > dpkg-l 2>/dev/null if [ -f /etc/sysconfig/rhn/up2date-uuid ]; then grep ^rhnuuid /etc/sysconfig/rhn/up2date-uuid > uuid fi LOG=$STATBASE/mac/$MAC/log.update LOGSAVE=$STATBASE/mac/$MAC/log.last FULLLOG=$STATBASE/mac/$MAC/log if [ -s $LOG ]; then DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` log "$DATE LOGWARNING: File didn't get appended to log at last run" cat $LOG >> $FULLLOG /bin/mv $LOGSAVE{,.0} &>/dev/null || true /bin/mv $LOG $LOGSAVE fi DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` echo -e "\n\n$DATE $NEWINSTALL: $IP/$HOSTNAME\n--------------------------------------------------------------------------------" >> $LOG fi cd $STATBASE # We need the cd's because ln stats from the current directory, hanging # while people try to mount /auto/mac cd ip ln -snf ../mac/$MAC $IP cd ../hostname ln -snf ../mac/$MAC $HOSTNAME # We don't want to stay in /auto/clientinfo, it's unreadable cd $WORKDIR trap 'die "error line $LINENO with status $?"' ERR fi # Hopefully we won't have anything on stderr if this script isn't buggy, # but should it happen... if [ -z "$ATTENDED" -a "$LOG" != /dev/null ]; then exec 2>>$LOG fi fi ################################################################################ # Actual getupdates starts here ################################################################################ # Now that we have logging to clientinfo on hopefully, we can do work [ $EUID -ne 0 ] && die "$0 needs to run as root" if [ -z "${TARGET:-}" ]; then die "Fatal: Can't work without target, couldn't read from /etc/sysconfig/getupdates. You should type something like echo \"TARGET=wksgold\" > /etc/sysconfig/getupdates" fi if [ "$PROTOCOL" != nfs -a "$PROTOCOL" != http ]; then log "LOGWARNING: Don't know what to do with protocol $PROTOCOL, defaulting back to http" PROTOCOL=http fi log "Syncing against target $UPDPATH via $PROTOCOL" # We go to the tmp dir and stay there for the duration of the changeset cd $WORKDIR/tmp getfile "list" || \ die "Could not retreive master list file from \"$UPDPATH\" via $PROTOCOL" OLDIFS=$IFS IFS=" " # There is a weird shell bug if we parse the file as we go along, so we'll # slurp the file first and then replay the tag values TAGS="" while read line do # Skip comments echo $line | grep -q "^#" && continue TAGS="$TAGS $line" done < list IFS=$OLDIFS /bin/rm list # We'll be in $WORKDIR/tmp during the entire duration of this loop. for UPDNAME in $TAGS do # We keep state of the runmes so that we can create a diff since last time # we ran a given one (fakeupdate mode). The first time, is a diff from empty SAVEDRUNME=$WORKDIR/state/runme_$UPDNAME test -f $SAVEDRUNME || touch $SAVEDRUNME # Allows admin to copy all the runmes once so that we can get a diff next # time they get updated if [ "$INITFAKE" = "yes" ]; then getfile --quiet "$UPDNAME/runme" || \ die "Error, can't process $UPDNAME. Couldn't access $UPDPATH/runme" # We want this for http, but not nfs, since it's a symlink to a RO FS chmod 755 runme &>/dev/null || true cp runme $SAVEDRUNME fi if [ -f "$WORKDIR/state/$UPDNAME" ]; then if getfile --nowarn --quiet "$UPDNAME/updatedate"; then LASTRUN=`cat $WORKDIR/state/$UPDNAME` if [ -z ${LASTRUN:-} ]; then LASTRUN=0 fi UPDDATE=`cat updatedate` LASTRUNS=`epocdate2str $LASTRUN` UPDDATES=`epocdate2str $UPDDATE` if [ $LASTRUN -lt $UPDDATE ]; then log "Running update $UPDNAME, last done at $LASTRUNS and updated at $UPDDATES" else log "Not running update $UPDNAME, already done at $LASTRUNS (update dates to $UPDDATES)" continue fi else log "Not running update $UPDNAME, already done" continue fi else log "Getting and running update $UPDNAME" fi if [ -d files ]; then rm -rf files fi getfile --quiet "$UPDNAME/runme" || die "Error, can't process $UPDNAME. Couldn't access $UPDPATH/runme" # We want this for http, but not nfs, since it's a symlink to a RO FS chmod 755 runme &>/dev/null || true # Not getting this isn't a fatal error, it may not be required by # runme. Note that getfile will delete the previous file that could be # on disk # Our make system however guarantees that it will be there (even if empty) # but no need to depend on that here getfile --nowarn "$UPDNAME/workfiles.tar.bz2" && tar xjf workfiles.tar.bz2 # Now, we optionally output the changes instead of running them if [ "$FAKEUPDATE" = "yes" ]; then log "FAKEUPDATE: diffing runme for $UPDNAME" diff -u $SAVEDRUNME runme || true # diff returns non 0 if files are != # ok, now we're going into ugly land, but it's better than nothing log "FAKEUPDATE: emulating apt-get commands in $UPDNAME (if any)" grep -q "apt-get update" runme && apt-get update >/dev/null grep -q 'apt-get-force upgrade' runme && apt-get -qq --dry-run upgrade || true # this is completely unaware of if statements, and may make other errors # but it beats nothing, doesn't it? egrep '^[ ]*apt-get(|-force) install' runme | sed -e "s/apt-get-force/apt-get/" -e "s/install/install -qq --dry-run/" > $WORKDIR/tmp/apt-get-torun bash $WORKDIR/tmp/apt-get-torun log "FAKEUPDATE: showing new/updated files for $UPDNAME (if any)" for file in `find files -type f | sed "s/^files\///"` do # Allow ignoring files like /etc/init.d/registerhost, which # show up in installfile, but get deleted on the running system if grep -qs "^/$file" $WORKDIR/state/fakeupdate_ignorefiles; then continue fi if [ -f /$file ]; then test -z "${file:-}" || diff -u /$file files/$file || true else log "FAKEUPDATE: $file is a new file" test -z "${file:-}" || diff -u /dev/null files/$file || true fi done else # In the errant case where somethings _quietly_ going wrong, sometimes # it helps to see what's going on... DEBUG=${DEBUG:-no} if [ "$DEBUG" = "yes" ] ; then RUNMECMD="bash -ex ./runme"; else RUNMECMD="./runme"; fi # We prefix stderr output for grepping in the logs, unless the said # output is known and prefixed with ok-> (in which case we strip that) # Yeah, we need to do the weird 3>&2 2>&1 1>&3 rotation to tag stderr # with a prefix (we swap stdout and stderr) ( ( $RUNMECMD $UPDNAME 3>&2 2>&1 1>&3 ; echo $? >$WORKDIR/tmp/err ) | sed -e "s/^/stderr->/" -e "s/^stderr->ok->//" -e "s/^stderr->/${ATTENDED}stderr->/" ) 2>&1 | tee -a $LOG errlev=`cat $WORKDIR/tmp/err` if [ "$errlev" != 0 ]; then die "$UPDNAME/runme failed to run properly (error $errlev), aborting..." fi if [ `find files -type f | wc -l` -gt 0 ]; then log "FILEWARN: files left behind after running $UPDNAME" | tee -a $LOG find files -type f | sed "s/^files/FILEWARN: /" | tee -a $LOG fi cp runme $SAVEDRUNME date "+%s" > "$WORKDIR/state/$UPDNAME" fi log rm -rf files || true done cat $LOG >> $FULLLOG if [ -f $LOGSAVE ]; then if [ -f $LOG ]; then /bin/mv $LOGSAVE{,.0} &>/dev/null || true # Just making really sure that LOG != /dev/null /bin/mv $LOG $LOGSAVE &>/dev/null || true fi elif [ -f $LOG ]; then /bin/mv $LOG $LOGSAVE &>/dev/null || true fi rm $LOCK &>/dev/null || true