#!/bin/bash # Scraper/gatherer script by Marc MERLIN # sr88 us50 i80 export PATH=/usr/local/sbin:$PATH set -o nounset baseurl=http://www.dot.ca.gov/hq/roadinfo road=${1:-sr88} ROAD=$(tr 'a-z' 'A-Z' <<< $road) EMAIL=${EMAIL:-$road@lists.merlins.org} DATE=$(date "+%Y:%m:%d-%H:%M") DATE2=$(date "+%Y:%m:%d %H:%M") SUBJ="Caltrans $ROAD update" dir=/var/www/html/merlins/roadinfo/$road mkdir -p $dir/old lock="$dir/$road.lock" epoch2date() { date -d "1970-01-01 $1 sec UTC" --rfc-3339=seconds } # shlock (from inn) does the right thing and grabs a lock for a dead process # (it checks the PID in the lock file and if it's not there, it # updates the PID with the value given to -p) if ! shlock -p $$ -f $lock; then echo "$lock held, quitting" >&2 exit fi cd $dir #gandalfthegrey:~$ set -o pipefail; wget -d http://www.dot.ca.gov/hq/roadinfo/us5 2>&1| grep Last-Modified: | sed "s/Last-Modified: //" #gandalfthegrey:~$ echo $? #1 #gandalfthegrey:~$ set -o pipefail; wget -d http://www.dot.ca.gov/hq/roadinfo/us50 2>&1| grep Last-Modified: | sed "s/Last-Modified: //" #Sat, 19 Nov 2011 14:48:48 GMT #gandalfthegrey:~$ echo $? #0 set -o pipefail rm -f $road # delete in case it was left over. /var/local/scr/alarm 10 wget -d $baseurl/$road 2>&1| grep Last-Modified: | sed "s/Last-Modified: //" > lastmodified.$road || exit date -d "$(cat lastmodified.$road)" "+%s" > lastmodified.epoch.$road oldepoch=$(cat lastmodified.epoch.$road.old) epoch=$(cat lastmodified.epoch.$road) if [ "$epoch" -lt "$oldepoch" ]; then if [[ $(( $oldepoch - $epoch )) > 60 ]]; then echo "Last modified date of new page for $road: $(epoch2date $epoch) is older than last received page ($(epoch2date $oldepoch))" >&2 echo "Discarding download" >&2 cat $road >&2 fi rm $road exit fi mv lastmodified.$road lastmodified.$road.old mv lastmodified.epoch.$road lastmodified.epoch.$road.old # Concatenate lines tail -n +3 $road | tr '\012\015' '| ' | sed "s/| |/@/g" | tr '|@' ' \012' | sed "s/^ *//" > $road.joined grep -Ei '(SNOW|CLOSED|CHAINS|AVALANCHE|ALTERNATE ROUTE|BEING HELD|REOPENED)' < $road.joined | grep -v 'CONNECTOR' > $road.parsed.new rm $road diff -U 0 $road.parsed $road.parsed.new | grep '^[-+][^-+]' > $road.parsed.diff cp $road.parsed.diff old/$road.$DATE.diff if [ -s $road.parsed.diff ]; then # some 'CLOSED' messages do not have a 'AT XX:YY', so in order not to miss any, # closed will be registered twice in those cases. # # # some closed messages may not have a matching 'reopened' # IS CLOSED 2 MI EAST OF TWIN BRIDGES # IS CLOSED 1.5 MI WEST OF KYBURZ # IS CLOSED FROM TWIN BRIDGES TO MEYERS # IS CLOSED TO EASTBOUND TRAFFIC 3 MI EAST OF POLLOCK PINES # IS CLOSED TO WESTBOUND TRAFFIC AT 4.8 MI WEST OF STRAWBERRY # IS CLOSED FROM ECHO SUMMIT TO MEYERS # IS CLOSED TO EASTBOUND TRAFFIC FROM COLFAX (PLACER) TO THE NEVADA STATE LINE # -> CLOSED.*AT doesn't work. # REOPENED TO NORMAL TRAFFIC AT THE JCT OF SR 49 /IN JACKSON/ (AMADOR CO) AT 1725 HRS ON 10/14/11 # -> REOPENED may not always be 'IS REOPENED' # # traffic held doesn't have a no longer being held matching message # TRAFFIC IS BEING HELD 1.5 MI WEST OF KYBURZ # TRAFFIC IS NO LONGER BEING HELD FROM # grep -v CONNECTOR| EACH | DETOUR.*AVAILABLE # # 88 specific stuff if grep -q '^\+.*IS CLOSED.*CARSON SPUR' $road.parsed.diff; then date +%s > spur_closed echo "$DATE2: Closed" >> spur.txt SUBJ="$SUBJ: Carson Spur CLOSED" elif grep -q '^\+.*IS CLOSED.*CARSON PASS' $road.parsed.diff; then date +%s > pass_closed echo "$DATE2: Closed" >> pass.txt SUBJ="$SUBJ: Carson Pass CLOSED" elif grep -q '^\+.*IS REOPENED.*CARSON SPUR' $road.parsed.diff; then diff=$(( ( $(date +%s) - $(cat spur_closed) ) / 60 )) && \ diff=" after ${diff}mn" && rm spur_closed echo "$DATE2: Reopened$diff" >> spur.txt SUBJ="$SUBJ: Carson Spur REOPENED$diff" elif grep -q '^\+.*IS REOPENED.*CARSON PASS' $road.parsed.diff; then diff=$(( ( $(date +%s) - $(cat pass_closed) ) / 60 )) && \ diff=" after ${diff}mn" && rm pass_closed echo "$DATE2: Reopened$diff" >> pass.txt SUBJ="$SUBJ: Carson Pass REOPENED$diff" # common to all roads elif grep '^\+.*IS CLOSED ' $road.parsed.diff | grep -qEv '( EACH | DETOUR.*AVAILABLE| TRUCKS )'; then date +%s > ${road}_closed echo "$DATE2: Closed" >> $road.txt SUBJ="$SUBJ: CLOSURE" elif grep -q 'REOPENED' $road.parsed.diff; then diff="" if [ -f "${road}_closed" ]; then diff=$(( ( $(date +%s) - $(cat ${road}_closed) ) / 60 )) && \ diff=" after ${diff}mn" && rm ${road}_closed fi echo "$DATE2: Reopened$diff" >> $road.txt SUBJ="$SUBJ: REOPENED$diff" elif grep '^\+.*IS BEING HELD ' $road.parsed.diff | grep -qv 'DETOUR.*AVAILABLE'; then date +%s > ${road}_held echo "$DATE2: Traffic Held" >> $road.txt SUBJ="$SUBJ: Traffic HELD" elif grep -q '^\+.*NO LONGER BEING HELD ' $road.parsed.diff; then diff="" if [ -f "${road}_held" ]; then diff=$(( ( $(date +%s) - $(cat ${road}_held) ) / 60 )) && \ diff=" after ${diff}mn" && rm ${road}_held fi echo "$DATE2: Traffic No Longer Held$diff" >> $road.txt SUBJ="$SUBJ: Traffic no longer HELD$diff" fi # cat | Mail -e -s "$SUBJ" marc@merlins.org <