root/galaxy-central/cron/check_galaxy.sh

リビジョン 2, 6.5 KB (コミッタ: hatakeyama, 14 年 前)

import galaxy-central

  • 属性 svn:executable の設定値 *
行番号 
1#!/bin/sh
2#set -xv
3#
4# Runs the scripts/check_galaxy.py script in a way that's easy to handle from cron
5#
6
7# defaults (note: default sleep is below since it depends on debug)
8DEBUG=0
9STAGGER=0
10INTERVAL=3
11MAIL=
12PAGE=
13NEWHIST=
14BARDARG=0
15# get commandline opts
16while getopts dsi:l:m:p:n optname
17do
18    case $optname in
19        d)  DEBUG=1 ;;
20        s)  STAGGER=1 ;;
21        i)  INTERVAL=$OPTARG ;;
22        l)  SLEEP=$OPTARG ;;
23        m)  MAIL="$MAIL $OPTARG" ;;
24        p)  PAGE="$PAGE $OPTARG" ;;
25        n)  NEWHIST="-n" ;;
26        *)  BADARG=1 ;;
27    esac
28done
29shift `expr $OPTIND - 1`
30
31if [ -z "$1" -o "$BADARG" ]; then
32    cat <<EOF
33usage: `basename $0` [-ds] [-i interval] [-m email_address]+ [-p pager_address]+ <galaxy_host>"
34  -d            Print debugging information.
35  -s            Stagger mailing the pagers/emails, instead of all at once when
36                there's a problem.  Useful for running check_galaxy at night.
37  -i <interval> The number of times this wrapper should execute before mailing
38                the next address, when staggering is enabled.  Mail is sent
39                every <interval> runs of the program, so the actual time
40                between emails is:
41                  time = (<interval>) * (how often wrapper runs from cron)
42  -l <seconds>  This wrapper runs check_galaxy a second time if the first check
43                fails, in case the problem is intermittent.  <seconds> is how
44                many seconds to sleep between checks.
45  -m <address>  Email addresses to send the full check_galaxy output to, if
46                Galaxy is down.  Use multiple -m options to specify multiple
47                addresses.  When staggering, email will be sent in the order
48                which you specify -m options on the command line.
49  -p <address>  Like -m, but sends just the last line of check_galaxy's output.
50                Useful for pagers.  When staggering is enabled and both -m and
51                -p options are present, the first -m address and the first -p
52                address are mailed simultaneously, followed by the second -m
53                and second -p, and so on.
54  -n            Create a new history (passes the -n option to check_galaxy.py).
55  <galaxy_host> The hostname of the Galaxy server to check.  Use a : if running
56                on a non-80 port (e.g. galaxy.example.com:8080).
57EOF
58    exit 1
59fi
60
61if [ -z "$SLEEP" ]; then
62    if [ $DEBUG ]; then
63        SLEEP=2
64    else
65        SLEEP=60
66    fi
67fi
68
69# globals
70CRON_DIR=`dirname $0`
71SCRIPTS_DIR="$CRON_DIR/../scripts"
72CHECK_GALAXY="$SCRIPTS_DIR/check_galaxy.py"
73VAR="$HOME/.check_galaxy"
74
75# sanity
76if [ ! -f $CHECK_GALAXY ]; then
77    [ $DEBUG = 1 ] && echo "$CHECK_GALAXY is missing"
78    exit 0
79fi
80
81# Do any other systems' default ps not take BSD ps args?
82case `uname -s` in
83    SunOS)  PS="/usr/ucb/ps" ;;
84    *)      PS="ps" ;;
85esac
86
87NOTIFIED_MAIL="$VAR/$1/mail"
88NOTIFIED_PAGE="$VAR/$1/page"
89MUTEX="$VAR/$1/wrap.mutex"
90COUNT="$VAR/$1/wrap.count"
91STAGGER_FILE="$VAR/$1/wrap.stagger"
92for dir in $VAR/$1 $NOTIFIED_MAIL $NOTIFIED_PAGE; do
93    if [ ! -d $dir ]; then
94        mkdir -p -m 0700 $dir
95        if [ $? -ne 0 ]; then
96            [ $DEBUG = 1 ] && echo "unable to create dir: $dir"
97            exit 0
98        fi
99    fi
100done
101
102if [ ! -f "$VAR/$1/login" ]; then
103    [ $DEBUG = 1 ] && cat <<EOF
104Please create the file:
105  $VAR/$1/login
106This should contain a username and password to log in to
107Galaxy with, on one line, separated by whitespace, e.g.:
108
109check_galaxy@example.com password
110
111If the user does not exist, check_galaxy will create it
112for you.
113EOF
114    exit 0
115fi
116
117if [ $STAGGER ]; then
118    if [ -f "$STAGGER_FILE" ]; then
119        STAGGER_COUNT=`cat $STAGGER_FILE`
120    else
121        STAGGER_COUNT=$INTERVAL
122    fi
123fi
124
125# only run one at once
126if [ -f $MUTEX ]; then
127    pid=`cat $MUTEX`
128    $PS p $pid >/dev/null 2>&1
129    if [ $? -eq 0 ]; then
130        if [ -f $COUNT ]; then
131            count=`cat $COUNT`
132        else
133            count=0
134        fi
135        if [ "$count" -eq 3 ]; then
136            echo "A check_galaxy process for $1 has been running for an unusually long time.  Something is broken." \
137                | mail -s "$1 problems" $MAIL
138        fi
139        expr $count + 1 > $COUNT
140        exit 0
141    else
142        # stale mutex
143        rm -f $MUTEX
144    fi
145fi
146
147rm -f $COUNT
148echo $$ > $MUTEX
149
150[ $DEBUG = 1 ] && echo "running first check"
151first_try=`$CHECK_GALAXY $NEWHIST $1 2>&1`
152
153if [ $? -ne 0 ]; then
154    # if failure, wait and try again
155    [ $DEBUG = 1 ] && echo "first check failed, sleeping $SLEEP seconds for second run"
156    sleep $SLEEP
157else
158    # if successful
159    [ $DEBUG = 1 ] && echo "first check succeeded"
160    for file in $NOTIFIED_MAIL/* $NOTIFIED_PAGE/*; do
161        recip=`basename $file`
162        # the literal string including the * will be passed if the dir is empty
163        [ "$recip" = '*' ] && continue
164        echo "$1 is now okay" | mail -s "$1 OK" $recip
165        rm -f $file
166        [ $DEBUG = 1 ] && echo "up: mailed $recip"
167    done
168    rm -f $MUTEX $STAGGER_FILE
169    exit 0
170fi
171
172[ $DEBUG = 1 ] && echo "running second check"
173second_try=`$CHECK_GALAXY $NEWHIST $1 2>&1`
174
175if [ $? -ne 0 ]; then
176    [ $DEBUG = 1 ] && echo "second check failed"
177    if [ $STAGGER = 1 ]; then
178        if [ "$STAGGER_COUNT" -eq "$INTERVAL" ]; then
179            # send notification this run
180            echo 1 > $STAGGER_FILE
181        else
182            # don't send notification this run
183            [ $DEBUG = 1 ] && echo "$1 is down, but it's not time to send an email.  STAGGER_COUNT was $STAGGER_COUNT"
184            expr $STAGGER_COUNT + 1 > $STAGGER_FILE
185            rm -f $MUTEX
186            exit 0
187        fi
188    fi
189    for recip in $MAIL; do
190        if [ ! -f "$NOTIFIED_MAIL/$recip" ]; then
191            cat <<HERE | mail -s "$1 problems" $recip
192$second_try
193HERE
194            touch "$NOTIFIED_MAIL/$recip"
195            [ $DEBUG = 1 ] && echo "dn: mailed $recip"
196            [ $STAGGER = 1 ] && break
197        fi
198    done
199    for recip in $PAGE; do
200        if [ ! -f "$NOTIFIED_PAGE/$recip" ]; then
201            cat <<HERE | tail -1 | mail -s "$1 problems" $recip
202$second_try
203HERE
204            touch "$NOTIFIED_PAGE/$recip"
205            [ $DEBUG = 1 ] && echo "dn: mailed $recip"
206            [ $STAGGER = 1 ] && break
207        fi
208    done
209else
210    [ $DEBUG = 1 ] && echo "second check succeeded"
211    for file in $NOTIFIED_MAIL/* $NOTIFIED_PAGE/*; do
212        recip=`basename $file`
213        [ "$recip" = '*' ] && continue
214        echo "$1 is now okay" | mail -s "$1 OK" $recip
215        rm -f $file
216        [ $DEBUG = 1 ] && echo "up: mailed $recip"
217    done
218    rm -f $STAGGER_FILE
219fi
220
221rm -f $MUTEX
222exit 0
Note: リポジトリブラウザについてのヘルプは TracBrowser を参照してください。