root / plugins / google / google-rank @ 6ffd5019
Historique | Voir | Annoter | Télécharger (4,7 ko)
| 1 | df3939be | ian dobson | #!/bin/bash |
|---|---|---|---|
| 2 | # Simple munin plugin to find the google rank for a URL/WORD combination |
||
| 3 | # |
||
| 4 | # THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG |
||
| 5 | # |
||
| 6 | # (c) 2009 i.dobson@planet-ian.com |
||
| 7 | # |
||
| 8 | # For each url/words that you want to watch you need to create a variable/word pair in your |
||
| 9 | # munin-node configuration file for example |
||
| 10 | # |
||
| 11 | #[google_rank] |
||
| 12 | #user root |
||
| 13 | #timeout 60 |
||
| 14 | #env.URL1 http://www.plant-ian.com |
||
| 15 | #env.WORD1 avr webcam |
||
| 16 | #env.URL2 http://www.plant-ian.com |
||
| 17 | #env.WORD2 bascom |
||
| 18 | # |
||
| 19 | # Version 0.5 24.1.2009 |
||
| 20 | # Added loop to check the first 500 pages. Note the script sleeps 5 seconds beween each page grab so |
||
| 21 | # If the word/url your looking for is in the higher positions then you need to increase the timeout |
||
| 22 | # |
||
| 23 | # Version 0.5 21.1.2009 |
||
| 24 | # Dump each page grabbed from google into seperate files (helps with debugging) |
||
| 25 | # |
||
| 26 | # Version 0.4 19.1.2009 |
||
| 27 | # Fixed corrupt then empty cache file bug |
||
| 28 | # |
||
| 29 | # Version 0.3 19.1.2009 |
||
| 30 | # The script now grabs the google page based on the LASTHIT counter. |
||
| 31 | # The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data |
||
| 32 | # |
||
| 33 | # Version 0.2 18.01.2009 |
||
| 34 | # Cache added, the script only grabs the pages from google every 10 calls |
||
| 35 | # The script still only checks to first 100 pages returned by google |
||
| 36 | # |
||
| 37 | # Version 0.1 17.01.2009 Initial release |
||
| 38 | # The script only checks to first 100 pages returned by google |
||
| 39 | # |
||
| 40 | |||
| 41 | # Auto Configure, Check it word 1 is defined |
||
| 42 | if [ "$1" = "autoconf" ]; then |
||
| 43 | if [ "$URL1" != "" ]; then |
||
| 44 | if [ "$WORD1" != "" ]; then |
||
| 45 | echo yes |
||
| 46 | exit 0 |
||
| 47 | fi |
||
| 48 | fi |
||
| 49 | echo no |
||
| 50 | exit 1 |
||
| 51 | fi |
||
| 52 | |||
| 53 | #Configure, loop through each variable defined WORDx URLx dumping it to munin |
||
| 54 | if [ "$1" = "config" ]; then |
||
| 55 | iLoop=1 |
||
| 56 | echo 'graph_title Google page rank' |
||
| 57 | echo 'graph_args --upper-limit 100 -l 0' |
||
| 58 | 6ffd5019 | Gabriele Pohl | echo 'graph_category google' |
| 59 | df3939be | ian dobson | echo 'graph_scale no' |
| 60 | echo 'graph_info Google page rank for URLs & Words' |
||
| 61 | |||
| 62 | URL="xxx" |
||
| 63 | until [ "$URL" = "" ]; do |
||
| 64 | TMPURL=URL$iLoop |
||
| 65 | URL="${!TMPURL}"
|
||
| 66 | TMPWORD=WORD$iLoop |
||
| 67 | WORD="${!TMPWORD}"
|
||
| 68 | if [ "$URL" = "" ]; then |
||
| 69 | exit 0 |
||
| 70 | fi |
||
| 71 | if [ "$WORD" = "" ]; then |
||
| 72 | exit 0 |
||
| 73 | fi |
||
| 74 | VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` |
||
| 75 | URL=`echo $URL| sed -e "s/http:\/\///g"` |
||
| 76 | echo $VAR.label Pagerank $URL - $WORD |
||
| 77 | let iLoop="$iLoop +1" |
||
| 78 | done |
||
| 79 | exit 0 |
||
| 80 | fi |
||
| 81 | |||
| 82 | #Meat of the program, grabs data from google for one word/url pair using LASTHIT as the pointer to which url/word pair to read |
||
| 83 | |||
| 84 | #Read update & save counter |
||
| 85 | LASTHIT=0 |
||
| 86 | if [ -f /tmp/google_rank.status ]; then |
||
| 87 | LASTHIT=`cat /tmp/google_rank.status | awk '{print $1}'`
|
||
| 88 | fi |
||
| 89 | |||
| 90 | let LASTHIT="$LASTHIT + 1" |
||
| 91 | echo $LASTHIT > /tmp/google_rank.status |
||
| 92 | |||
| 93 | #Find URL/WORD PAIR for loop counter |
||
| 94 | TMPURL=URL$LASTHIT |
||
| 95 | URL="${!TMPURL}"
|
||
| 96 | TMPWORD=WORD$LASTHIT |
||
| 97 | WORD="${!TMPWORD}"
|
||
| 98 | |||
| 99 | if [ "$URL" != "" ]; then |
||
| 100 | |||
| 101 | #Setup defaults |
||
| 102 | base=0 |
||
| 103 | num=1 |
||
| 104 | start=0 |
||
| 105 | FOUND=0 |
||
| 106 | #Clean up URL/WORD pair, removing http:// replacing " " with "_", "." with "_", "-" with "-" |
||
| 107 | VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"` |
||
| 108 | SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"` |
||
| 109 | |||
| 110 | until [ "$FOUND" -ne "0" ]; do |
||
| 111 | #Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined |
||
| 112 | wget -q --user-agent=Firefox -O - http://www.google.com/search?q=$SEARCHWORD\&num=100\&hl=en\&safe=off\&pwst=1\&start=$start\&sa=N > /tmp/google_rank.$LASTHIT.data |
||
| 113 | VALUE=`cat /tmp/google_rank.$LASTHIT.data|sed 's/<a href=\"\([^\"]*\)\" class=l>/\n\1\n/g'|awk -v num=$num -v base=$base '{ if ( $1 ~ /^http/ ) print base,num++,$NF }'|awk '{ print $2 " " $3}'|grep -i $URL| awk '{ print $1}'`
|
||
| 114 | VALUE=`echo $VALUE| awk '{ print $1}'`
|
||
| 115 | if [ "$VALUE" = "" ]; then |
||
| 116 | VALUE=-1 |
||
| 117 | let start="start + 100" |
||
| 118 | sleep 5 |
||
| 119 | else |
||
| 120 | FOUND=1 |
||
| 121 | let VALUE="$VALUE + $start" |
||
| 122 | fi |
||
| 123 | ### echo Start=$start Value=$VALUE Found=$FOUND |
||
| 124 | if [ "$start" -gt 500 ];then |
||
| 125 | FOUND=-1 |
||
| 126 | VALUE=-1 |
||
| 127 | fi |
||
| 128 | done |
||
| 129 | |||
| 130 | #Read through cache file saving to array |
||
| 131 | iLoop=1 |
||
| 132 | while read line ;do |
||
| 133 | Data[$iLoop]=$line |
||
| 134 | let iLoop="$iLoop +1" |
||
| 135 | done < /tmp/google_rank.cache |
||
| 136 | |||
| 137 | #replace one line with the new value grabbed from google |
||
| 138 | Data[$LASTHIT]="$VAR.value $VALUE" |
||
| 139 | |||
| 140 | #write data back |
||
| 141 | rm /tmp/google_rank.cache |
||
| 142 | for iLoop in `seq 1 10`; do |
||
| 143 | echo ${Data[$iLoop]} >> /tmp/google_rank.cache
|
||
| 144 | done |
||
| 145 | fi |
||
| 146 | |||
| 147 | #Reset counter to start |
||
| 148 | if [ "$LASTHIT" -gt 30 ]; then |
||
| 149 | echo 0 > /tmp/google_rank.status |
||
| 150 | fi |
||
| 151 | |||
| 152 | #Dump data to munin |
||
| 153 | while read line ;do |
||
| 154 | if [ "$line" != "" ]; then |
||
| 155 | echo $line |
||
| 156 | fi |
||
| 157 | done < /tmp/google_rank.cache |
||
| 158 | exit 0 |
