Révision df3939be
Initial version
| plugins/other/google-rank | ||
|---|---|---|
| 1 |
#!/bin/bash |
|
| 2 |
# Simple munin plugin to find the google rank for a URL/WORD combination |
|
| 3 |
# |
|
| 4 |
# THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG |
|
| 5 |
# |
|
| 6 |
# (c) 2009 i.dobson@planet-ian.com |
|
| 7 |
# |
|
| 8 |
# For each url/words that you want to watch you need to create a variable/word pair in your |
|
| 9 |
# munin-node configuration file for example |
|
| 10 |
# |
|
| 11 |
#[google_rank] |
|
| 12 |
#user root |
|
| 13 |
#timeout 60 |
|
| 14 |
#env.URL1 http://www.plant-ian.com |
|
| 15 |
#env.WORD1 avr webcam |
|
| 16 |
#env.URL2 http://www.plant-ian.com |
|
| 17 |
#env.WORD2 bascom |
|
| 18 |
# |
|
| 19 |
# Version 0.5 24.1.2009 |
|
| 20 |
# Added loop to check the first 500 pages. Note the script sleeps 5 seconds beween each page grab so |
|
| 21 |
# If the word/url your looking for is in the higher positions then you need to increase the timeout |
|
| 22 |
# |
|
| 23 |
# Version 0.5 21.1.2009 |
|
| 24 |
# Dump each page grabbed from google into seperate files (helps with debugging) |
|
| 25 |
# |
|
| 26 |
# Version 0.4 19.1.2009 |
|
| 27 |
# Fixed corrupt then empty cache file bug |
|
| 28 |
# |
|
| 29 |
# Version 0.3 19.1.2009 |
|
| 30 |
# The script now grabs the google page based on the LASTHIT counter. |
|
| 31 |
# The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data |
|
| 32 |
# |
|
| 33 |
# Version 0.2 18.01.2009 |
|
| 34 |
# Cache added, the script only grabs the pages from google every 10 calls |
|
| 35 |
# The script still only checks to first 100 pages returned by google |
|
| 36 |
# |
|
| 37 |
# Version 0.1 17.01.2009 Initial release |
|
| 38 |
# The script only checks to first 100 pages returned by google |
|
| 39 |
# |
|
| 40 |
|
|
| 41 |
# Auto Configure, Check it word 1 is defined |
|
| 42 |
if [ "$1" = "autoconf" ]; then |
|
| 43 |
if [ "$URL1" != "" ]; then |
|
| 44 |
if [ "$WORD1" != "" ]; then |
|
| 45 |
echo yes |
|
| 46 |
exit 0 |
|
| 47 |
fi |
|
| 48 |
fi |
|
| 49 |
echo no |
|
| 50 |
exit 1 |
|
| 51 |
fi |
|
| 52 |
|
|
| 53 |
#Configure, loop through each variable defined WORDx URLx dumping it to munin |
|
| 54 |
if [ "$1" = "config" ]; then |
|
| 55 |
iLoop=1 |
|
| 56 |
echo 'graph_title Google page rank' |
|
| 57 |
echo 'graph_args --upper-limit 100 -l 0' |
|
| 58 |
echo 'graph_category other' |
|
| 59 |
echo 'graph_scale no' |
|
| 60 |
echo 'graph_info Google page rank for URLs & Words' |
|
| 61 |
|
|
| 62 |
URL="xxx" |
|
| 63 |
until [ "$URL" = "" ]; do |
|
| 64 |
TMPURL=URL$iLoop |
|
| 65 |
URL="${!TMPURL}"
|
|
| 66 |
TMPWORD=WORD$iLoop |
|
| 67 |
WORD="${!TMPWORD}"
|
|
| 68 |
if [ "$URL" = "" ]; then |
|
| 69 |
exit 0 |
|
| 70 |
fi |
|
| 71 |
if [ "$WORD" = "" ]; then |
|
| 72 |
exit 0 |
|
| 73 |
fi |
|
| 74 |
VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` |
|
| 75 |
URL=`echo $URL| sed -e "s/http:\/\///g"` |
|
| 76 |
echo $VAR.label Pagerank $URL - $WORD |
|
| 77 |
let iLoop="$iLoop +1" |
|
| 78 |
done |
|
| 79 |
exit 0 |
|
| 80 |
fi |
|
| 81 |
|
|
| 82 |
#Meat of the program, grabs data from google for one word/url pair using LASTHIT as the pointer to which url/word pair to read |
|
| 83 |
|
|
| 84 |
#Read update & save counter |
|
| 85 |
LASTHIT=0 |
|
| 86 |
if [ -f /tmp/google_rank.status ]; then |
|
| 87 |
LASTHIT=`cat /tmp/google_rank.status | awk '{print $1}'`
|
|
| 88 |
fi |
|
| 89 |
|
|
| 90 |
let LASTHIT="$LASTHIT + 1" |
|
| 91 |
echo $LASTHIT > /tmp/google_rank.status |
|
| 92 |
|
|
| 93 |
#Find URL/WORD PAIR for loop counter |
|
| 94 |
TMPURL=URL$LASTHIT |
|
| 95 |
URL="${!TMPURL}"
|
|
| 96 |
TMPWORD=WORD$LASTHIT |
|
| 97 |
WORD="${!TMPWORD}"
|
|
| 98 |
|
|
| 99 |
if [ "$URL" != "" ]; then |
|
| 100 |
|
|
| 101 |
#Setup defaults |
|
| 102 |
base=0 |
|
| 103 |
num=1 |
|
| 104 |
start=0 |
|
| 105 |
FOUND=0 |
|
| 106 |
#Clean up URL/WORD pair, removing http:// replacing " " with "_", "." with "_", "-" with "-" |
|
| 107 |
VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` |
|
| 108 |
SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"` |
|
| 109 |
|
|
| 110 |
until [ "$FOUND" -ne "0" ]; do |
|
| 111 |
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined |
|
| 112 |
wget -q --user-agent=Firefox -O - http://www.google.com/search?q=$SEARCHWORD\&num=100\&hl=en\&safe=off\&pwst=1\&start=$start\&sa=N > /tmp/google_rank.$LASTHIT.data |
|
| 113 |
VALUE=`cat /tmp/google_rank.$LASTHIT.data|sed 's/<a href=\"\([^\"]*\)\" class=l>/\n\1\n/g'|awk -v num=$num -v base=$base '{ if ( $1 ~ /^http/ ) print base,num++,$NF }'|awk '{ print $2 " " $3}'|grep -i $URL| awk '{ print $1}'`
|
|
| 114 |
VALUE=`echo $VALUE| awk '{ print $1}'`
|
|
| 115 |
if [ "$VALUE" = "" ]; then |
|
| 116 |
VALUE=-1 |
|
| 117 |
let start="start + 100" |
|
| 118 |
sleep 5 |
|
| 119 |
else |
|
| 120 |
FOUND=1 |
|
| 121 |
let VALUE="$VALUE + $start" |
|
| 122 |
fi |
|
| 123 |
### echo Start=$start Value=$VALUE Found=$FOUND |
|
| 124 |
if [ "$start" -gt 500 ];then |
|
| 125 |
FOUND=-1 |
|
| 126 |
VALUE=-1 |
|
| 127 |
fi |
|
| 128 |
done |
|
| 129 |
|
|
| 130 |
#Read through cache file saving to array |
|
| 131 |
iLoop=1 |
|
| 132 |
while read line ;do |
|
| 133 |
Data[$iLoop]=$line |
|
| 134 |
let iLoop="$iLoop +1" |
|
| 135 |
done < /tmp/google_rank.cache |
|
| 136 |
|
|
| 137 |
#replace one line with the new value grabbed from google |
|
| 138 |
Data[$LASTHIT]="$VAR.value $VALUE" |
|
| 139 |
|
|
| 140 |
#write data back |
|
| 141 |
rm /tmp/google_rank.cache |
|
| 142 |
for iLoop in `seq 1 10`; do |
|
| 143 |
echo ${Data[$iLoop]} >> /tmp/google_rank.cache
|
|
| 144 |
done |
|
| 145 |
fi |
|
| 146 |
|
|
| 147 |
#Reset counter to start |
|
| 148 |
if [ "$LASTHIT" -gt 30 ]; then |
|
| 149 |
echo 0 > /tmp/google_rank.status |
|
| 150 |
fi |
|
| 151 |
|
|
| 152 |
#Dump data to munin |
|
| 153 |
while read line ;do |
|
| 154 |
if [ "$line" != "" ]; then |
|
| 155 |
echo $line |
|
| 156 |
fi |
|
| 157 |
done < /tmp/google_rank.cache |
|
| 158 |
exit 0 |
|
| 159 |
|
|
Formats disponibles : Unified diff