#!/bin/bash # # Basic software updater by Marc MERLIN (2003/04/17 - v1.0) # Portions Copyright Google, Inc # License: GPL v2 #* # $Id: //depot/ops/corp/grhat/tree/usr/local/scripts/getupdates#78 $ # # 2003-04-23 - read target from /proc/cmdline (from kickstart) #»______ - chown unpacked files to root # 2003-05-01 - Lots of new code to log output and errors via NFS # 2003-05-02 - Added locking, random sleep, and other improvements # 2003-05-08 - Lock after the sleep, look for updatedate and optionally re-run #»______ already run update # 2003-05-14 - Catch errors in function tools, rotated files are immutable #»______ More data logged for each client # 2003-05-22 - Added more info on machines (cpu/pci), set more log variables #»______ to default values and deal with unexpected errors better #»______ target dirs have been moved to grhat/targets to reduce clutter #»______ Added --firstload to show machines being reloaded # 2003-06-06 - Added initial http support #»______ Added server probe code to exit early #»______ Rewrote to be errexit/nounset compliant # 2003-11-13 - Added trap for dies, and reworked variable passing from boot #»______ kernel variables # xxxx-xx-xx - other many changes recorded p4 # 2004-01-09 - External release v1.0 / Cleaned up release for LCA 2004 # 2005-02-16 - Second outside release # Exit if we use any uset variable (use ${VAR:-} to test for possibly unset var) set -o nounset export PATH="/usr/local/scripts:/usr/local/sbin:/usr/local/bin/:/usr/sbin:/usr/bin/:/sbin:/bin:$PATH" LOCK=/var/lock/getupdates LOGSPOOL="" LOG=/dev/null FULLLOG=/dev/null LOGSAVE=/dev/null log () { DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # If we haven't opened the log file yet, remember log messages if [ ! -f $LOG ]; then if [ ! -z "${1:-}" ]; then LOGSPOOL="$LOGSPOOL\n$DATE LOG: $1" echo "$DATE LOG: $1" fi return elif [ ! -z "$LOGSPOOL" ]; then echo -e $LOGSPOOL >> $LOG LOGSPOOL="" fi if [ -z "${1:-}" ]; then echo echo >> $LOG else echo "$DATE LOG: $1" echo "$DATE LOG: $1" >> $LOG fi } MAC=`ifconfig eth0 | head -1 | awk '{print $5}'` IP=`ifconfig eth0 | head -2 | tail -1 | sed -e "s/[^:]*://" -e "s/ .*//"` || true die () { # don't loop on ERR trap '' ERR DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # Log an empty line to trigger LOGSPOOL emptying should LOG have become # available log "" msg=${1:-} more_info="IP:${IP:-UNSET} MAC:${MAC:-UNSET}" if [ ! -z ${LOG:-} ]; then # If we log to an NFS server, we'll pick up the error via cron if [ -f $LOG ]; then echo "$DATE DIE: $msg" echo "$DATE DIE: $more_info" # If not, let's generate a cron mail else echo "$DATE DIE: $msg" >&2 echo "$DATE DIE: $more_info" >&2 fi echo "$DATE DIE: $msg failed" >> $LOG echo "$DATE DIE: $more_info" >> $LOG cat $LOG >> $FULLLOG if [ -f $LOGSAVE ]; then if [ -f $LOG ]; then /bin/mv $LOGSAVE{,.0} &>/dev/null || true # Just making really sure that LOG != /dev/null /bin/mv $LOG $LOGSAVE &>/dev/null || true fi fi fi if echo $msg | grep -q 'error line .* with status'; then line=`echo $msg | sed 's/.*error line \(.*\) with status.*/\1/'` # prepending DIE allows for the log watcher to report this message # back in the hourly cron mail echo " DIE: Code dump:" nl -ba $0 | grep -3 "\b$line\b" | sed "s/^/ DIE: /" fi rm $LOCK &>/dev/null || true exit 1 } # Exit if any line returns an error # Actually trap ERR seems to take over errexit, so no need to define it #set -o errexit # Trap errors for logging before we die (so that they can be picked up # by the log checker) trap 'die "error line $LINENO with status $?"' ERR HOSTNAME=`hostname -f 2>/dev/null | sed 's/\.[^.]*\.[^.]*$//'` || true if [ -z "${HOSTNAME:-}" ]; then HOSTNAME=`hostname 2>/dev/null` || true log "WARNING: hostname on $HOSTNAME/$IP is a non qualified hostname or is not correctly listed in /etc/hosts, please fix" fi if [ -z "${HOSTNAME:-}" ]; then HOSTNAME="unknown" log "WARNING: Couldn't get hostname on $IP, please fix" fi # Who we ping to decide if we're up or not # The idea is not to generate stderr output (i.e. a cron mail) if we're still # down BASECHECKHOST=host.domain.tld # this is what we base shortcuts on. Again, this is to save space in the kernel # parameters, since they're limited to 255 chars DOMAIN=sub.domain.tld # We use short, sucky names because they can be fed from the kernel command # line, and it has a limit of 255 chars # TARGD is a common root added to NFSUPDPATH and HTTPUPDPATH TARGD="/grhat/targets" # Base access to update info over NFS (TARGD is appended) # It is important to split this up and mount $NFSHOST:/$NFSUPDPATH and not # $NFSHOST:/$NFSUPDPATH/$TARGD because some NFS implementations (like ontap # nfs v4 virtual mounts) will not let you mount more than 1 level deep NFSHOST="software.nfs" NFSUPDPATH="/software/" # Base access to update info over HTTP (TARGD is appended) HTTPUPDPATH="http://apt/" # Where we write output STATBASE="/auto/clientinfo" # This is mostly for install time status reporting (when /auto automount # doesn't work) NFSSTATMNT="clientinfo.nfs:/clientinfo" # Do we show changes instead of applying them? FAKEUPDATE="no" if [ -f /etc/sysconfig/getupdates ]; then . /etc/sysconfig/getupdates fi if [ -z "${WORKDIR:-}" ]; then WORKDIR="/var/lib/grhat" # Which this is a default, we write it to the config file anyway, because # it is read by changesets to know where they should write state and where # the grhat tmp dir is echo 'WORKDIR=/var/lib/grhat' >> /etc/sysconfig/getupdates fi epocdate2str () { echo `date -d "1970-01-01 $1 sec UTC" "+%Y/%m/%d %H:%M:%S"` } pinghost () { HOST=$1 FPING=/bin/fping if [ ! -x $FPING ]; then log "$FPING absent, skipping fping check for $HOST" return 0 fi # Try 10 times with 5 seconds in between if ! $FPING -B5.0 -r 10 $BASECHECKHOST 2>/dev/null; then # We don't die and output on stderr, we want to avoid the cron mail log "FATAL: networking seems to be down: can't reach $BASECHECKHOST" log "`fping -A -c 3 $BASECHECKHOST 2>&1`" exit 1 fi # Try 10 times with 5 seconds in between $FPING -B5.0 -r 10 $HOST 2>/dev/null; ret=$? if [ $ret -ne 0 ]; then log "Can't fing $HOST, fping output is:" log "`fping -A -c 3 $HOST 2>&1`" fi return $ret } # Getfile will give output on STDERR about files it can't receive and why, # unless you call it with --nowarn # --quiet will not show data about files retreived # The function will return 0 for success and 1 for file retreival failure getfile () { warn() { echo "$#" >&2 } if [ "${1:-}" = --nowarn ]; then shift warn () { : } fi QUIET="" if [ "${1:-}" = --quiet ]; then QUIET=-q shift fi DESTFILE=`basename "$1"` FILE="$1" if [ $PROTOCOL = nfs ]; then if [ -d $UPDPATH ]; then if [ ! -f $UPDPATH/"$1" ]; then warn "$UPDPATH/$1 not there" return 1 fi ln -snf $UPDPATH/"$1" $WORKDIR/tmp/ else die "Protocol NFS configured, but $UPDPATH not found" fi elif [ $PROTOCOL = http ]; then pushd $WORKDIR/tmp/ >/dev/null /bin/rm "$DESTFILE" &>/dev/null || true if ! wget $QUIET $UPDPATH/"$1" 2>&1; then warn "wget $UPDPATH/$1 failed ($?)" popd >/dev/null return 1 fi popd >/dev/null else die "Unknown protocol $PROTOCOL" fi return 0 } # Whether there is someone behind the keyboard (in which case we munge stderr # so that it doesn't get sent back up to the getupdate mail watchers) ATTENDED="seen|" NEWINSTALL="NEWENTRY" INITFAKE="no" # Yes, I could use getopt... while [ ! -z "${1:-}" ] do case "$1" in --firstload) NEWINSTALL="NEWINSTALL" ATTENDED="" shift ;; --sleep) SLEEP=1 shift ;; --unattended) ATTENDED="" shift ;; --fakeupdate) # $FAKEUPDATE=yes means that we don't apply updates, you usually set # this in /etc/sysconfig/getupdates, but you can force it from the # command line FAKEUPDATE="yes" shift ;; --initfake) # Allows admin to make copy of all runmes so that we can give a # meaningful diff next time we see them INITFAKE="yes" shift ;; --apply) # $FAKEUPDATE=yes means that we don't apply updates, but only # show diffs, but --apply is an override to ask to apply them. FAKEUPDATE="no" shift ;; --debug) # Performs a bash -ex on all runmes, useful for determining where # silent getupdates failures happen DEBUG="yes" shift ;; *) die "What is this \"$1\" argument?" esac done if [ -z "${TARGET:-}" ]; then die "Fatal: Can't work without target, couldn't read from /etc/sysconfig/getupdates. You should type something like echo \"TARGET=wksgold\" > /etc/sysconfig/getupdates" fi if [ "$PROTOCOL" != nfs -a "$PROTOCOL" != http ]; then log "LOGWARNING: Don't know what to do with protocol $PROTOCOL, defaulting back to http" PROTOCOL=http fi HTTPHOST=`echo $HTTPUPDPATH | sed 's/http:\/\/\([^/]*\)\/.*/\1/'` NFSSTATUSHOST=`echo $NFSSTATMNT | sed 's/:.*//'` if [ ! -z ${SLEEP:-} ]; then # This can sleep up to 20mn or so SLEEP=$(( $RANDOM/30 )) # Yes, this log only goes to STDOUT and /dev/null, I know :) log "Sleeping $SLEEP before starting" # Sometimes sleep doesn't succeed or is killed (rare, but I've seen it happen) sleep $SLEEP || true fi # shlock does the right thing and grabs a lock for a dead process # (it checks the PID in the lock file and if it's not there, it # updates the PID with the value given to -p) if ! shlock -p $$ -f $LOCK; then PID="`cat $LOCK 2>/dev/null`" # We're not calling die, because it would remove the lock file from the other process msg="Lock $LOCK held by running process $PID, quitting" DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` # To stdout to go to /var/log/getupdates or stdout if run from command line echo "$DATE DIE: $msg" exit 1 fi if [ ! -z "${NFSSTATUSHOST:-}" -a "${CIPROTO:-}" != "none" ]; then # check if clientinfo dir is not there (like at install time) if [ ! -d $STATBASE ]; then # and if so, try to create it if ! mkdir -p $STATBASE 2>&1; then # If we make this a warn, we get a mail to know about it, but # it can be spammy. With log, it only goes in the local logs # and never gets reported since we're failing to mount the # clientinfo log dir :) # (actually we will notice that the machine is "down" after 15 days # of its logs not reaching us anymore) log "LOGWARNING: Can't create $STATBASE, autofs is probably running but failing to automount" else # ok, the dir is there, so we try to mount it then mount -o nolock $NFSSTATMNT $STATBASE || log "LOGWARNING: Could not mount $NFSSTATMNT" fi # else dir is there, but make sure it has clientinfo data elif [ ! -e $STATBASE/mac ]; then mount -o nolock $NFSSTATMNT $STATBASE || log "LOGWARNING: Could not mount $NFSSTATMNT" fi if [ -e $STATBASE/mac ]; then # Do not exit if we get any NFS write error trap '' ERR # This might already exist, or mkdir might fail test -d $STATBASE/mac/$MAC || mkdir $STATBASE/mac/$MAC || true # Directory group is going to be changed from nfsnobody to ops # by a cronjob on stan, so it ends up being unreadable for # us. We want that. chmod 350 $STATBASE/mac/$MAC || true # This might fail if nfs is not working for some reason cd $STATBASE/mac/$MAC if [ $? -eq 0 ]; then echo $IP > ip echo $HOSTNAME > hostname echo $TARGET > target ifconfig -a > ifconfig-a route -n > route-n cat /etc/ssh/ssh_host_{,dsa_,rsa_}key.pub > ssh_hostkeys.pub 2>/dev/null # don't hang in case we have a bad NFS mount df > df 2>/dev/null & mount > mount 2>/dev/null & (date; echo "--------"; who | egrep ' (tty|:0)'; echo ) >> who OWNER=`who | egrep ' (tty|:0)' | head -1 | sed "s/ .*//"` [ ! -z "$OWNER" ] && echo $OWNER > owner uname -a > uname-a free > free cat /proc/cpuinfo > cpuinfo # user mode linux doesn't have /proc/bus/pci lspci 2>/dev/null > lspci ps -auxww > ps-auxww rpm -qa | sort > rpm-qa rpm -qa --queryformat="%{NAME} %{VERSION} %{RELEASE} %{EPOCH}\n" | sort > rpm-qa.split rpm -qa --queryformat="\n"| sort > rpm-qa.xml if [ -f /etc/sysconfig/rhn/up2date-uuid ]; then grep ^rhnuuid /etc/sysconfig/rhn/up2date-uuid > uuid fi LOG=$STATBASE/mac/$MAC/log.update LOGSAVE=$STATBASE/mac/$MAC/log.last FULLLOG=$STATBASE/mac/$MAC/log if [ -s $LOG ]; then DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` log "$DATE LOGWARNING: File didn't get appended to log at last run" cat $LOG >> $FULLLOG /bin/mv $LOGSAVE{,.0} &>/dev/null || true /bin/mv $LOG $LOGSAVE fi DATE=`date "+%Y/%m/%d %H:%M:%S ($$)"` echo -e "\n\n$DATE $NEWINSTALL: $IP/$HOSTNAME\n--------------------------------------------------------------------------------" >> $LOG fi cd $STATBASE #Think relative here, We're in ., we want to link mac/$MAC to ip/$IP, and relative from mac, it's ../ # We need the cd's because ln stats from the current directory, hanging while people try # to mount /auto/mac cd ip ln -snf ../mac/$MAC $IP cd ../hostname ln -snf ../mac/$MAC $HOSTNAME # We don't want to stay in /auto/clientinfo, it's unreadable cd $WORKDIR trap 'die "error line $LINENO with status $?"' ERR fi # Hopefully we won't have anything on stderr if this script isn't buggy, # but should it happen... if [ -z "$ATTENDED" -a "$LOG" != /dev/null ]; then exec 2>>$LOG fi fi if [ "$PROTOCOL" = nfs ]; then if ! pinghost $NFSHOST; then # Only generate stderr and a cron error mail if we aren't NFS logging if [ "$LOG" != /dev/null ]; then log "FATAL: PING: Can't ping $NFSHOST (was configured for NFS)" die else die "PING: Can't ping $NFSHOST (was configured for NFS)" fi fi UPDPATH="$NFSUPDPATH/$TARGD/$TARGET" elif [ "$PROTOCOL" = http ]; then if ! pinghost $HTTPHOST; then # Only generate stderr and a cron error mail if we aren't NFS logging if [ "$LOG" != /dev/null ]; then log "FATAL: PING: Can't ping $HTTPHOST (was configured for NFS)" die else die "PING: Can't ping $HTTPHOST (was configured for NFS)" fi fi UPDPATH="$HTTPUPDPATH/$TARGD/$TARGET" fi log "Syncing against target $UPDPATH via $PROTOCOL" if [ $EUID -ne 0 ]; then die "$0 needs to run as root" fi if [ ! -d $WORKDIR/tmp ]; then mkdir -p $WORKDIR/tmp || die "Can't mkdir $WORKDIR/tmp" fi if [ ! -d $WORKDIR/work ]; then mkdir -p $WORKDIR/work || die "Can't mkdir $WORKDIR/work" fi if [ ! -d $WORKDIR/state ]; then mkdir -p $WORKDIR/state || die "Can't mkdir $WORKDIR/state" fi if [ ! -f $WORKDIR/state/changedfiles ]; then touch $WORKDIR/state/changedfiles || die "Can't create $WORKDIR/state/changedfiles" fi cd $WORKDIR getfile "list" || die "Could not retreive master list file from \"$UPDPATH\" via $PROTOCOL" mv $WORKDIR/tmp/list . OLDIFS=$IFS IFS=" " # There is a weird shell interaction bug, so let's slurp the file first # and then replay the tag values TAGS="" while read line do # Skip comments if [ `echo $line | grep "^#" | wc -l` -eq 1 ]; then continue fi TAGS="$TAGS $line" done < $WORKDIR/list IFS=$OLDIFS /bin/rm $WORKDIR/list cd $WORKDIR/tmp for UPDNAME in $TAGS do SAVEDRUNME=$WORKDIR/state/runme_$UPDNAME test -f $SAVEDRUNME || touch $SAVEDRUNME # Allows admin to copy all the runmes once so that we can get a diff next # time they get updated if [ "$INITFAKE" = "yes" ]; then getfile --quiet "$UPDNAME/runme" || die "Error, can't process $UPDNAME. Couldn't access $UPDPATH/runme" # We want this for http, but not nfs, since it's a symlink to a RO FS chmod 755 runme &>/dev/null || true cp runme $SAVEDRUNME fi if [ -f "$WORKDIR/state/$UPDNAME" ]; then if getfile --nowarn --quiet "$UPDNAME/updatedate"; then LASTRUN=`cat $WORKDIR/state/$UPDNAME` if [ -z ${LASTRUN:-} ]; then LASTRUN=0 fi UPDDATE=`cat updatedate` LASTRUNS=`epocdate2str $LASTRUN` UPDDATES=`epocdate2str $UPDDATE` if [ $LASTRUN -lt $UPDDATE ]; then log "Running update $UPDNAME, last done at $LASTRUNS and updated at $UPDDATES" else log "Not running update $UPDNAME, already done at $LASTRUNS (update dates to $UPDDATES)" continue fi else log "Not running update $UPDNAME, already done" continue fi else log "Getting and running update $UPDNAME" fi if [ -d files ]; then rm -rf files fi getfile --quiet "$UPDNAME/runme" || die "Error, can't process $UPDNAME. Couldn't access $UPDPATH/runme" # We want this for http, but not nfs, since it's a symlink to a RO FS chmod 755 runme &>/dev/null || true # Not getting this isn't a fatal error, it may not be required by # runme. Note that getfile will delete the previous file that could be # on disk # Our make system however guarantees that it will be there (even if empty) # but no need to depend on that here getfile --nowarn "$UPDNAME/workfiles.tar.bz2" && tar xjf workfiles.tar.bz2 # Now, we optionally output the changes instead of running them if [ "$FAKEUPDATE" = "yes" ]; then log "FAKEUPDATE: diffing runme for $UPDNAME" diff -u $SAVEDRUNME runme || true # diff returns non 0 if files are != # ok, now we're going into ugly land, but it's better than nothing log "FAKEUPDATE: emulating apt-get commands in $UPDNAME (if any)" grep -q "apt-get update" runme && apt-get update >/dev/null grep -q 'apt-get-force upgrade' runme && apt-get -qq --dry-run upgrade # this is completely unaware of if statements, and may make other errors # but it beats nothing, doesn't it? egrep '^[ ]*apt-get(|-force) install' runme | sed -e "s/apt-get-force/apt-get/" -e "s/install/install -qq --dry-run/" > $WORKDIR/tmp/apt-get-torun bash $WORKDIR/tmp/apt-get-torun log "FAKEUPDATE: showing new/updated files for $UPDNAME (if any)" for file in `find files -type f | sed "s/^files\///"` do # Allow ignoring files like /etc/init.d/registerhost, which # show up in installfile, but get deleted on the running system if grep -qs "^/$file" $WORKDIR/state/fakeupdate_ignorefiles; then continue fi if [ -f /$file ]; then test -z "${file:-}" || diff -u /$file files/$file || true else log "FAKEUPDATE: $file is a new file" test -z "${file:-}" || diff -u /dev/null files/$file || true fi done else # In the errant case where somethings _quietly_ going wrong, sometimes # it helps to see what's going on... DEBUG=${DEBUG:-no} if [ "$DEBUG" = "yes" ] ; then RUNMECMD="bash -ex ./runme"; else RUNMECMD="./runme"; fi # We prefix stderr output for grepping in the logs, unless the said # output is known and prefixed with ok-> (in which case we strip that) ( ( $RUNMECMD $UPDNAME 3>&2 2>&1 1>&3 ; echo $? >$WORKDIR/tmp/err ) | sed -e "s/^/stderr->/" -e "s/^stderr->ok->//" -e "s/^stderr->/${ATTENDED}stderr->/" ) 2>&1 | tee -a $LOG errlev=`cat $WORKDIR/tmp/err` if [ "$errlev" != 0 ]; then die "$UPDNAME/runme failed to run properly (error $errlev), aborting..." fi if [ `find files -type f | wc -l` -gt 0 ]; then log "FILEWARN: files left behind after running $UPDNAME" | tee -a $LOG find files -type f | sed "s/^files/FILEWARN: /" | tee -a $LOG fi cp runme $SAVEDRUNME date "+%s" > "$WORKDIR/state/$UPDNAME" fi log rm -rf files || true done cat $LOG >> $FULLLOG if [ -f $LOGSAVE ]; then if [ -f $LOG ]; then /bin/mv $LOGSAVE{,.0} &>/dev/null || true # Just making really sure that LOG != /dev/null /bin/mv $LOG $LOGSAVE &>/dev/null || true fi elif [ -f $LOG ]; then /bin/mv $LOG $LOGSAVE &>/dev/null || true fi