Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / other / google-rank @ df3939be

Historique | Voir | Annoter | Télécharger (4,7 ko)

1
#!/bin/bash
2
# Simple munin plugin to find the google rank for a URL/WORD combination
3
#
4
# THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG
5
# 
6
# (c) 2009 i.dobson@planet-ian.com
7
#
8
# For each url/words that you want to watch you need to create a variable/word pair in your 
9
# munin-node configuration file for example
10
#
11
#[google_rank]
12
#user root
13
#timeout 60
14
#env.URL1 http://www.plant-ian.com
15
#env.WORD1 avr webcam
16
#env.URL2 http://www.plant-ian.com
17
#env.WORD2 bascom
18
#
19
# Version 0.5 24.1.2009
20
# Added loop to check the first 500 pages. Note the script sleeps 5 seconds beween each page grab so
21
# If the word/url your looking for is in the higher positions then you need to increase the timeout
22
#  
23
# Version 0.5 21.1.2009
24
# Dump each page grabbed from google into seperate files (helps with debugging) 
25
#
26
# Version 0.4 19.1.2009
27
# Fixed corrupt then empty cache file bug
28
#
29
# Version 0.3 19.1.2009 
30
#  The script now grabs the google page based on the LASTHIT counter.
31
#  The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data
32
#
33
# Version 0.2 18.01.2009 
34
#  Cache added, the script only grabs the pages from google every 10 calls
35
#  The script still only checks to first 100 pages returned by google
36
#
37
# Version 0.1 17.01.2009 Initial release
38
#  The script only checks to first 100 pages returned by google
39
#
40

    
41
# Auto Configure, Check it word 1 is defined
42
if [ "$1" = "autoconf" ]; then
43
   if [ "$URL1" != "" ]; then
44
      if [ "$WORD1" != "" ]; then
45
         echo yes
46
         exit 0
47
      fi
48
   fi
49
   echo no
50
   exit 1
51
fi
52

    
53
#Configure, loop through each variable defined WORDx URLx dumping it to munin
54
if [ "$1" = "config" ]; then
55
   iLoop=1
56
   echo 'graph_title Google page rank'
57
   echo 'graph_args --upper-limit 100 -l 0'
58
   echo 'graph_category other'
59
   echo 'graph_scale no'
60
   echo 'graph_info Google page rank for URLs & Words'
61

    
62
   URL="xxx"
63
   until [  "$URL" = "" ]; do
64
     TMPURL=URL$iLoop
65
     URL="${!TMPURL}"
66
     TMPWORD=WORD$iLoop
67
     WORD="${!TMPWORD}"
68
     if [ "$URL" = "" ]; then
69
       exit 0
70
     fi
71
     if [ "$WORD" = "" ]; then
72
       exit 0
73
     fi 
74
     VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"`
75
     URL=`echo $URL| sed -e "s/http:\/\///g"`
76
     echo $VAR.label Pagerank $URL - $WORD
77
     let iLoop="$iLoop +1"
78
   done
79
   exit 0
80
fi
81

    
82
#Meat of the program, grabs data from google for one word/url pair using LASTHIT as the pointer to which url/word pair to read
83

    
84
#Read update & save counter
85
LASTHIT=0
86
if [ -f /tmp/google_rank.status ]; then
87
  LASTHIT=`cat /tmp/google_rank.status | awk '{print $1}'`
88
fi
89

    
90
let LASTHIT="$LASTHIT + 1"
91
echo $LASTHIT > /tmp/google_rank.status
92

    
93
#Find URL/WORD PAIR for loop counter
94
TMPURL=URL$LASTHIT
95
URL="${!TMPURL}"
96
TMPWORD=WORD$LASTHIT
97
WORD="${!TMPWORD}"
98

    
99
if [ "$URL" != "" ]; then
100

    
101
#Setup defaults
102
  base=0
103
  num=1
104
  start=0
105
  FOUND=0
106
#Clean up URL/WORD pair, removing http:// replacing " " with "_", "." with "_", "-" with "-"
107
  VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"`
108
  SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"`
109

    
110
until [ "$FOUND" -ne "0" ]; do
111
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined 
112
    wget -q --user-agent=Firefox -O - http://www.google.com/search?q=$SEARCHWORD\&num=100\&hl=en\&safe=off\&pwst=1\&start=$start\&sa=N > /tmp/google_rank.$LASTHIT.data
113
    VALUE=`cat /tmp/google_rank.$LASTHIT.data|sed 's/<a href=\"\([^\"]*\)\" class=l>/\n\1\n/g'|awk -v num=$num -v base=$base '{ if ( $1 ~ /^http/ ) print base,num++,$NF }'|awk '{ print $2 "  " $3}'|grep -i $URL| awk '{ print $1}'`
114
    VALUE=`echo $VALUE| awk '{ print $1}'`
115
    if [ "$VALUE" = "" ]; then
116
      VALUE=-1
117
      let start="start + 100"
118
      sleep 5
119
    else
120
      FOUND=1 
121
      let VALUE="$VALUE + $start"
122
    fi
123
###    echo Start=$start Value=$VALUE Found=$FOUND
124
    if [ "$start" -gt 500 ];then
125
      FOUND=-1
126
      VALUE=-1
127
    fi
128
done
129

    
130
#Read through cache file saving to array
131
  iLoop=1
132
  while read line ;do
133
    Data[$iLoop]=$line
134
    let iLoop="$iLoop +1"
135
  done < /tmp/google_rank.cache
136

    
137
#replace one line with the new value grabbed from google
138
  Data[$LASTHIT]="$VAR.value $VALUE"
139

    
140
#write data back
141
  rm /tmp/google_rank.cache
142
  for iLoop in `seq 1 10`; do 
143
    echo ${Data[$iLoop]} >> /tmp/google_rank.cache
144
  done
145
fi
146

    
147
#Reset counter to start 
148
  if [ "$LASTHIT" -gt 30 ]; then
149
     echo 0 > /tmp/google_rank.status
150
  fi
151

    
152
#Dump data to munin
153
  while read line ;do
154
    if [ "$line" != "" ]; then
155
       echo $line
156
    fi
157
  done < /tmp/google_rank.cache
158
exit 0
159