Projet

Général

Profil

Révision 17f78427

ID17f784270ae966ee9a13e9f5104a5b8f925b639e
Parent ef851f0c
Enfant d4320aea, 5b2396a9

Ajouté par Lars Kruse il y a plus de 7 ans

Whitespace cleanup

  • remove trailing whitespace
  • remove empty lines at the end of files

Voir les différences:

plugins/google/google-rank
2 2
# Simple munin plugin to find the google rank for a URL/WORD combination
3 3
#
4 4
# THIS SCRIPT BREAKS THE TOS OF GOOGLE SO USE WITH CARE AND DON'T BLAME ME IF THINGS GO WRONG
5
# 
5
#
6 6
# (c) 2009 i.dobson@planet-ian.com
7 7
#
8
# For each url/words that you want to watch you need to create a variable/word pair in your 
8
# For each url/words that you want to watch you need to create a variable/word pair in your
9 9
# munin-node configuration file for example
10 10
#
11 11
#[google_rank]
......
19 19
# Version 0.5 24.1.2009
20 20
# Added loop to check the first 500 pages. Note the script sleeps 5 seconds between each page grab so
21 21
# If the word/url your looking for is in the higher positions then you need to increase the timeout
22
#  
22
#
23 23
# Version 0.5 21.1.2009
24
# Dump each page grabbed from google into separate files (helps with debugging) 
24
# Dump each page grabbed from google into separate files (helps with debugging)
25 25
#
26 26
# Version 0.4 19.1.2009
27 27
# Fixed corrupt then empty cache file bug
28 28
#
29
# Version 0.3 19.1.2009 
29
# Version 0.3 19.1.2009
30 30
#  The script now grabs the google page based on the LASTHIT counter.
31 31
#  The script grabs the google page for URL1, then the next time it's called URL2 etc. If the url/word pair doesn't exist for LASTHIT then the script just dumps the cached data
32 32
#
33
# Version 0.2 18.01.2009 
33
# Version 0.2 18.01.2009
34 34
#  Cache added, the script only grabs the pages from google every 10 calls
35 35
#  The script still only checks to first 100 pages returned by google
36 36
#
......
70 70
     fi
71 71
     if [ "$WORD" = "" ]; then
72 72
       exit 0
73
     fi 
73
     fi
74 74
     VAR=`echo $URL.$WORD | sed -e "s/http:\/\///g"| sed -e "s/ /_/g"| sed -e "s/\./_/g"| sed -e "s/\-/_/g"`
75 75
     URL=`echo $URL| sed -e "s/http:\/\///g"`
76 76
     echo $VAR.label Pagerank $URL - $WORD
......
108 108
  SEARCHWORD=`echo $WORD| sed -e "s/ /%20/g"`
109 109

  
110 110
until [ "$FOUND" -ne "0" ]; do
111
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined 
111
#Grab page from google for the WORD/PAGE combination.Pipe it into awk to pull out the url's only, one per line. Then dump only the lines containing the URL defined
112 112
    wget -q --user-agent=Firefox -O - http://www.google.com/search?q=$SEARCHWORD\&num=100\&hl=en\&safe=off\&pwst=1\&start=$start\&sa=N > /tmp/google_rank.$LASTHIT.data
113 113
    VALUE=`cat /tmp/google_rank.$LASTHIT.data|sed 's/<a href=\"\([^\"]*\)\" class=l>/\n\1\n/g'|awk -v num=$num -v base=$base '{ if ( $1 ~ /^http/ ) print base,num++,$NF }'|awk '{ print $2 "  " $3}'|grep -i $URL| awk '{ print $1}'`
114 114
    VALUE=`echo $VALUE| awk '{ print $1}'`
......
117 117
      let start="start + 100"
118 118
      sleep 5
119 119
    else
120
      FOUND=1 
120
      FOUND=1
121 121
      let VALUE="$VALUE + $start"
122 122
    fi
123 123
###    echo Start=$start Value=$VALUE Found=$FOUND
......
139 139

  
140 140
#write data back
141 141
  rm /tmp/google_rank.cache
142
  for iLoop in `seq 1 10`; do 
142
  for iLoop in `seq 1 10`; do
143 143
    echo ${Data[$iLoop]} >> /tmp/google_rank.cache
144 144
  done
145 145
fi
146 146

  
147
#Reset counter to start 
147
#Reset counter to start
148 148
  if [ "$LASTHIT" -gt 30 ]; then
149 149
     echo 0 > /tmp/google_rank.status
150 150
  fi

Formats disponibles : Unified diff