Révision 17f78427
Whitespace cleanup
- remove trailing whitespace
- remove empty lines at the end of files
| plugins/google/google-rank | ||
|---|---|---|
| 2 | 2 |
# Simple munin plugin to find the google rank for a URL/WORD combination |
| 3 | 3 |
# |
| 4 | 4 |
# THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG |
| 5 |
#
|
|
| 5 |
# |
|
| 6 | 6 |
# (c) 2009 i.dobson@planet-ian.com |
| 7 | 7 |
# |
| 8 |
# For each url/words that you want to watch you need to create a variable/word pair in your
|
|
| 8 |
# For each url/words that you want to watch you need to create a variable/word pair in your |
|
| 9 | 9 |
# munin-node configuration file for example |
| 10 | 10 |
# |
| 11 | 11 |
#[google_rank] |
| ... | ... | |
| 19 | 19 |
# Version 0.5 24.1.2009 |
| 20 | 20 |
# Added loop to check the first 500 pages. Note the script sleeps 5 seconds between each page grab so |
| 21 | 21 |
# If the word/url your looking for is in the higher positions then you need to increase the timeout |
| 22 |
#
|
|
| 22 |
# |
|
| 23 | 23 |
# Version 0.5 21.1.2009 |
| 24 |
# Dump each page grabbed from google into separate files (helps with debugging)
|
|
| 24 |
# Dump each page grabbed from google into separate files (helps with debugging) |
|
| 25 | 25 |
# |
| 26 | 26 |
# Version 0.4 19.1.2009 |
| 27 | 27 |
# Fixed corrupt then empty cache file bug |
| 28 | 28 |
# |
| 29 |
# Version 0.3 19.1.2009
|
|
| 29 |
# Version 0.3 19.1.2009 |
|
| 30 | 30 |
# The script now grabs the google page based on the LASTHIT counter. |
| 31 | 31 |
# The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data |
| 32 | 32 |
# |
| 33 |
# Version 0.2 18.01.2009
|
|
| 33 |
# Version 0.2 18.01.2009 |
|
| 34 | 34 |
# Cache added, the script only grabs the pages from google every 10 calls |
| 35 | 35 |
# The script still only checks to first 100 pages returned by google |
| 36 | 36 |
# |
| ... | ... | |
| 70 | 70 |
fi |
| 71 | 71 |
if [ "$WORD" = "" ]; then |
| 72 | 72 |
exit 0 |
| 73 |
fi
|
|
| 73 |
fi |
|
| 74 | 74 |
VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` |
| 75 | 75 |
URL=`echo $URL| sed -e "s/http:\/\///g"` |
| 76 | 76 |
echo $VAR.label Pagerank $URL - $WORD |
| ... | ... | |
| 108 | 108 |
SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"` |
| 109 | 109 |
|
| 110 | 110 |
until [ "$FOUND" -ne "0" ]; do |
| 111 |
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined
|
|
| 111 |
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined |
|
| 112 | 112 |
wget -q --user-agent=Firefox -O - http://www.google.com/search?q=$SEARCHWORD\&num=100\&hl=en\&safe=off\&pwst=1\&start=$start\&sa=N > /tmp/google_rank.$LASTHIT.data |
| 113 | 113 |
VALUE=`cat /tmp/google_rank.$LASTHIT.data|sed 's/<a href=\"\([^\"]*\)\" class=l>/\n\1\n/g'|awk -v num=$num -v base=$base '{ if ( $1 ~ /^http/ ) print base,num++,$NF }'|awk '{ print $2 " " $3}'|grep -i $URL| awk '{ print $1}'`
|
| 114 | 114 |
VALUE=`echo $VALUE| awk '{ print $1}'`
|
| ... | ... | |
| 117 | 117 |
let start="start + 100" |
| 118 | 118 |
sleep 5 |
| 119 | 119 |
else |
| 120 |
FOUND=1
|
|
| 120 |
FOUND=1 |
|
| 121 | 121 |
let VALUE="$VALUE + $start" |
| 122 | 122 |
fi |
| 123 | 123 |
### echo Start=$start Value=$VALUE Found=$FOUND |
| ... | ... | |
| 139 | 139 |
|
| 140 | 140 |
#write data back |
| 141 | 141 |
rm /tmp/google_rank.cache |
| 142 |
for iLoop in `seq 1 10`; do
|
|
| 142 |
for iLoop in `seq 1 10`; do |
|
| 143 | 143 |
echo ${Data[$iLoop]} >> /tmp/google_rank.cache
|
| 144 | 144 |
done |
| 145 | 145 |
fi |
| 146 | 146 |
|
| 147 |
#Reset counter to start
|
|
| 147 |
#Reset counter to start |
|
| 148 | 148 |
if [ "$LASTHIT" -gt 30 ]; then |
| 149 | 149 |
echo 0 > /tmp/google_rank.status |
| 150 | 150 |
fi |
Formats disponibles : Unified diff