root / plugins / http / http_load_ @ 17f78427
Historique | Voir | Annoter | Télécharger (19,1 ko)
| 1 |
#!/usr/bin/perl |
|---|---|
| 2 |
# -*- perl -*- |
| 3 |
|
| 4 |
=head1 NAME |
| 5 |
|
| 6 |
http_load_ Munin multigraph plugin to monitor websites's HTTP responses and performance |
| 7 |
|
| 8 |
=head1 DESCRIPTION |
| 9 |
|
| 10 |
The purpose of this plugin is to monitor several properties of a web page. |
| 11 |
All measurements are done for the complete web page, including images, css |
| 12 |
and other content a standard browser would download automatically. |
| 13 |
|
| 14 |
This version supports monitoring: |
| 15 |
- loadtime: total time to download a complete web page (using serial GET requests) |
| 16 |
- size: total size of a web page |
| 17 |
- response: different response codes (200, 404, 500, etc) |
| 18 |
- tags: HTML tags (img src, a href, etc) |
| 19 |
- type: content types (image/png, text/css/, etc) |
| 20 |
- elements: source of elements loaded by the web page |
| 21 |
|
| 22 |
=head1 REQUIREMENTS |
| 23 |
|
| 24 |
- The server running this plugin must be allowed to connect to the web |
| 25 |
server(s) you are going to monitor. |
| 26 |
- Some perl modules: |
| 27 |
Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache |
| 28 |
|
| 29 |
=head1 CONFIGURATION |
| 30 |
|
| 31 |
=head2 INITIAL SETUP |
| 32 |
|
| 33 |
1. Copy this file to /usr/share/munin/plugins/ |
| 34 |
|
| 35 |
2. Create a file (/etc/munin/http_load_urls.txt) with one |
| 36 |
full url per line, as many as you want, i.e.: |
| 37 |
$ echo "http://www.dn.no/" >> /etc/munin/urls.txt |
| 38 |
$ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt |
| 39 |
|
| 40 |
3. Add a cron job running the plugin with cron as the argument: |
| 41 |
*/15 * * * * <user> /usr/sbin/munin-run http_load_<site>_loadtime cron |
| 42 |
<user> should be the user that has write permission to the $cachedir |
| 43 |
directory set below. <site> should be any of the configured sites (all |
| 44 |
sites will get updated), likewise, you should replace loadtime by any |
| 45 |
metric that is enabled for that site (all metrics will get updated). |
| 46 |
Set the intervals to whatever you want. |
| 47 |
|
| 48 |
For verbose output (for debugging) you can do: |
| 49 |
sudo -u <user> /usr/share/munin/plugins/http_load_ cron verbose |
| 50 |
|
| 51 |
4. Run munin-node-configure --suggest --shell and run the symlink |
| 52 |
commands manually to update the munin-node plugin list.xi |
| 53 |
|
| 54 |
5. If you want to change the filter which the plugin uses to select which |
| 55 |
tags to follow in a web page, edit the subroutine called "filter" below.) |
| 56 |
|
| 57 |
=head2 SPECIFY URLS TO MONITOR |
| 58 |
|
| 59 |
1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.: |
| 60 |
$ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt |
| 61 |
|
| 62 |
2. Run munin-node-configure --suggest --shell and manually |
| 63 |
add the new symlink(s) |
| 64 |
|
| 65 |
3. /etc/init.d/munin-node restart |
| 66 |
|
| 67 |
=head2 REMOVE A URL |
| 68 |
|
| 69 |
1. Remove it from /etc/munin/http_load_urls.txt |
| 70 |
|
| 71 |
2. Remove ${cachedir}/http_load_<url_id>*
|
| 72 |
|
| 73 |
3. Remove /etc/munin/plugins/http_load_<url_id>* |
| 74 |
|
| 75 |
4. /etc/init.d/munin-node restart |
| 76 |
|
| 77 |
=head2 SINGLE GRAPH SUPPORT |
| 78 |
|
| 79 |
The default behaviour is the multigraph mode: only the loadtime will be shown |
| 80 |
on the Munin summary page. The graphs there are linked to a second-level |
| 81 |
summary page that list all other metrics. It is also possible to create |
| 82 |
single graphs, that would show immediately on the summary page, by using |
| 83 |
symlinks with a different name, postfixed with the name of the metric: |
| 84 |
|
| 85 |
- http_load_hostname: multigraph (default) |
| 86 |
- http_load_hostname_loadtime: loadtime only |
| 87 |
- http_load_hostname_size: total page size |
| 88 |
- http_load_hostname_response: response code |
| 89 |
- http_load_hostname_tags: HTML tags summary |
| 90 |
- http_load_hostname_type: Content-Types |
| 91 |
- http_load_hostname_elements: source site of the loaded elements |
| 92 |
|
| 93 |
Note that hostname is not the FQDN of the host, but rather the one given when |
| 94 |
running munin-node-configure --suggest --shell and run the symlink |
| 95 |
|
| 96 |
=head1 MAGIC MARKERS |
| 97 |
|
| 98 |
#%# family=auto |
| 99 |
#%# capabilities=autoconf suggest |
| 100 |
|
| 101 |
=head1 TODO |
| 102 |
|
| 103 |
- Specify URLs from a standard Munin plugins configuration file (e.g., env.urls) |
| 104 |
- Add support for forking to simulate real browsers |
| 105 |
|
| 106 |
=head1 AUTHORS |
| 107 |
|
| 108 |
- Espen Braastad / Linpro AS <espen@linpro.no>, initial implementation |
| 109 |
- Olivier Mehani <shtrom+munin@ssji.net>, multigraph support |
| 110 |
|
| 111 |
=cut |
| 112 |
|
| 113 |
use strict; |
| 114 |
use Time::HiRes qw( gettimeofday tv_interval ); |
| 115 |
use LWP::UserAgent; |
| 116 |
use HTML::LinkExtor; |
| 117 |
use LWP::ConnCache; |
| 118 |
|
| 119 |
my $url_file="/etc/munin/http_load_urls.txt"; |
| 120 |
my $cachedir=$ENV{MUNIN_PLUGSTATE};
|
| 121 |
|
| 122 |
my $debug=$ENV{MUNIN_DEBUG};
|
| 123 |
my $timeout=10; |
| 124 |
my $max_redirects=10; |
| 125 |
my $scriptname="http_load_"; |
| 126 |
my $useragent="Mozilla/5.0 (Munin; $scriptname)"; |
| 127 |
|
| 128 |
# Function to read the $url_file and return the contents in a hash |
| 129 |
sub read_urls{
|
| 130 |
my $file=$_[0]; |
| 131 |
my %urls=(); |
| 132 |
if(-r $file){
|
| 133 |
open(FILE,'<'.$file); |
| 134 |
while (<FILE>) {
|
| 135 |
my $url=$_; |
| 136 |
chomp($url); |
| 137 |
my $id=get_id($url); |
| 138 |
if(length($id)>0){
|
| 139 |
$urls{$id}=$url;
|
| 140 |
} |
| 141 |
} |
| 142 |
close (FILE); |
| 143 |
} |
| 144 |
return %urls; |
| 145 |
} |
| 146 |
|
| 147 |
# Function to read cache, return a hash |
| 148 |
sub read_cache{
|
| 149 |
my $file=$_[0]; |
| 150 |
my %cache=(); |
| 151 |
if(-r $file){
|
| 152 |
open(FILE,'<'.$file); |
| 153 |
while (<FILE>) {
|
| 154 |
m/^(\S*)\s+(.*)$/; |
| 155 |
$cache{ $1 } = $2;
|
| 156 |
} |
| 157 |
close (FILE); |
| 158 |
} |
| 159 |
return %cache; |
| 160 |
} |
| 161 |
|
| 162 |
# Function to filter the html tags, which files do we want to download |
| 163 |
sub filter{
|
| 164 |
my $tag=$_[0]; |
| 165 |
my $status=1; |
| 166 |
|
| 167 |
# Some example data: |
| 168 |
# link href http://www.intrafish.no/template/include/css/intrafish.css |
| 169 |
# script src http://www.intrafish.no/template/include/js/intrafish.js |
| 170 |
# a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300; |
| 171 |
# img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491; |
| 172 |
# area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg |
| 173 |
|
| 174 |
# status=1 => do download (default) |
| 175 |
# status=0 => do not download |
| 176 |
|
| 177 |
# For links, the 'rel' is more relevant that the 'src' attribute |
| 178 |
if("$tag" =~ /^link/){
|
| 179 |
$status=0; |
| 180 |
if("$tag" =~ /stylesheet$/){
|
| 181 |
$status=1; |
| 182 |
} |
| 183 |
} |
| 184 |
if("$tag" eq "form action"){
|
| 185 |
$status=0; |
| 186 |
} |
| 187 |
if("$tag" eq "a href"){
|
| 188 |
$status=0; |
| 189 |
} |
| 190 |
if("$tag" eq "area href"){
|
| 191 |
$status=0; |
| 192 |
} |
| 193 |
if("$tag" eq "meta content"){
|
| 194 |
$status=0; |
| 195 |
} |
| 196 |
return $status; |
| 197 |
} |
| 198 |
|
| 199 |
# Return the cache file name for this plugin |
| 200 |
sub get_cache_file_name{
|
| 201 |
my $scriptname=$_[0]; |
| 202 |
my $id=$_[1]; |
| 203 |
my $file=""; |
| 204 |
|
| 205 |
$file = $scriptname . $id . ".cache"; |
| 206 |
$debug && print "Cache file: " . $file . "\n"; |
| 207 |
|
| 208 |
return $file; |
| 209 |
} |
| 210 |
|
| 211 |
# Get fieldname (making sure it is munin-1.0 "compatible" as a fieldname) |
| 212 |
# 1. Remove all non-word characters from a string) |
| 213 |
# 2. Make sure it has maximum 19 characters |
| 214 |
# 2.1 If not, truncate the host part, while keeping anything after an underscore (e.g., HTTP response status) |
| 215 |
sub get_fieldname{
|
| 216 |
my $url=$_[0]; |
| 217 |
$url =~ s/\W//g; |
| 218 |
if(length($url) > 19){
|
| 219 |
$url =~ s/(\S+)_(\S+)/ /g; |
| 220 |
my $host = $1; |
| 221 |
my $info = $2; |
| 222 |
my $suffixlength = length($info) + 1; |
| 223 |
if ($suffixlength > 1) {
|
| 224 |
$url = substr($host, 0, 19 - $suffixlength) . '_' . $info; |
| 225 |
} else {
|
| 226 |
$url = substr($url, 0, 19); |
| 227 |
} |
| 228 |
} |
| 229 |
return $url; |
| 230 |
} |
| 231 |
|
| 232 |
# Same as get_fieldname except it doesn't substr |
| 233 |
sub get_id{
|
| 234 |
my $url=$_[0]; |
| 235 |
$url =~ s/[\W_]//g; |
| 236 |
return $url; |
| 237 |
} |
| 238 |
|
| 239 |
sub graph_title_config{
|
| 240 |
my $id = $_[0]; |
| 241 |
my %urls = %{$_[1]};
|
| 242 |
my $type = $_[2]; |
| 243 |
|
| 244 |
print "graph_title $urls{$id} ${type}\n";
|
| 245 |
print "graph_args -l 0 --base 1000\n"; |
| 246 |
print "graph_category webserver\n"; |
| 247 |
} |
| 248 |
|
| 249 |
sub size_config{
|
| 250 |
my $id = $_[0]; |
| 251 |
my %urls = %{$_[1]};
|
| 252 |
my %cache = %{$_[2]};
|
| 253 |
|
| 254 |
my $count = 0; |
| 255 |
|
| 256 |
graph_title_config($id, \%urls, "size"); |
| 257 |
|
| 258 |
print "graph_vlabel Bytes\n"; |
| 259 |
print "graph_total Total\n"; |
| 260 |
print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n";
|
| 261 |
|
| 262 |
if(keys(%cache)>0){
|
| 263 |
for my $key ( sort reverse keys %cache ){
|
| 264 |
my $value=$cache{$key};
|
| 265 |
|
| 266 |
if($key =~ m/^size_(\S+)$/){
|
| 267 |
my $host=$1; |
| 268 |
my $value=$value; |
| 269 |
|
| 270 |
my $name=$1; |
| 271 |
$name=get_fieldname($name); |
| 272 |
|
| 273 |
print "$name.label from $host\n"; |
| 274 |
print "$name.min 0\n"; |
| 275 |
print "$name.max 20000000\n"; |
| 276 |
if($count eq 0){
|
| 277 |
print "$name.draw AREA\n"; |
| 278 |
} else {
|
| 279 |
print "$name.draw STACK\n"; |
| 280 |
} |
| 281 |
$count+=1; |
| 282 |
} |
| 283 |
} |
| 284 |
} |
| 285 |
} |
| 286 |
|
| 287 |
sub loadtime_config{
|
| 288 |
my $id = $_[0]; |
| 289 |
my %urls = %{$_[1]};
|
| 290 |
my %cache = %{$_[2]};
|
| 291 |
|
| 292 |
my $count = 0; |
| 293 |
|
| 294 |
graph_title_config($id, \%urls, "loadtime"); |
| 295 |
|
| 296 |
print "graph_vlabel Seconds\n"; |
| 297 |
print "graph_total Total\n"; |
| 298 |
print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. ";
|
| 299 |
print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n"; |
| 300 |
|
| 301 |
if(keys(%cache)>0){
|
| 302 |
for my $key ( sort reverse keys %cache ){
|
| 303 |
my $value=$cache{$key};
|
| 304 |
|
| 305 |
if($key =~ m/^loadtime_(\S+)$/){
|
| 306 |
my $host=$1; |
| 307 |
my $value=$value; |
| 308 |
|
| 309 |
my $name=$1; |
| 310 |
$name=get_fieldname($name); |
| 311 |
|
| 312 |
print "$name.label from $host\n"; |
| 313 |
print "$name.min 0\n"; |
| 314 |
print "$name.max 400\n"; |
| 315 |
if($count eq 0){
|
| 316 |
print "$name.draw AREA\n"; |
| 317 |
} else {
|
| 318 |
print "$name.draw STACK\n"; |
| 319 |
} |
| 320 |
$count+=1; |
| 321 |
} |
| 322 |
} |
| 323 |
} |
| 324 |
} |
| 325 |
|
| 326 |
sub elements_config{
|
| 327 |
my $id = $_[0]; |
| 328 |
my %urls = %{$_[1]};
|
| 329 |
my %cache = %{$_[2]};
|
| 330 |
|
| 331 |
my $count = 0; |
| 332 |
|
| 333 |
graph_title_config($id, \%urls, "elements"); |
| 334 |
|
| 335 |
print "graph_vlabel Number of elements\n"; |
| 336 |
print "graph_total Total\n"; |
| 337 |
print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n";
|
| 338 |
|
| 339 |
if(keys(%cache)>0){
|
| 340 |
for my $key ( sort reverse keys %cache ){
|
| 341 |
my $value=$cache{$key};
|
| 342 |
|
| 343 |
if($key =~ m/^elements_(\S+)$/){
|
| 344 |
my $host=$1; |
| 345 |
my $value=$value; |
| 346 |
|
| 347 |
my $name=$1; |
| 348 |
$name=get_fieldname($name); |
| 349 |
|
| 350 |
print "$name.label from $host\n"; |
| 351 |
print "$name.min 0\n"; |
| 352 |
print "$name.max 10000\n"; |
| 353 |
if($count eq 0){
|
| 354 |
print "$name.draw AREA\n"; |
| 355 |
} else {
|
| 356 |
print "$name.draw STACK\n"; |
| 357 |
} |
| 358 |
$count+=1; |
| 359 |
} |
| 360 |
} |
| 361 |
} |
| 362 |
} |
| 363 |
|
| 364 |
sub response_config{
|
| 365 |
my $id = $_[0]; |
| 366 |
my %urls = %{$_[1]};
|
| 367 |
my %cache = %{$_[2]};
|
| 368 |
|
| 369 |
my $count = 0; |
| 370 |
|
| 371 |
graph_title_config($id, \%urls, "response"); |
| 372 |
|
| 373 |
print "graph_vlabel Server response code count\n"; |
| 374 |
print "graph_total Total\n"; |
| 375 |
print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n";
|
| 376 |
|
| 377 |
if(keys(%cache)>0){
|
| 378 |
for my $key ( sort reverse keys %cache ){
|
| 379 |
my $value=$cache{$key};
|
| 380 |
|
| 381 |
if($key =~ m/^response_(\S+)$/){
|
| 382 |
my $host=$1; |
| 383 |
my $value=$value; |
| 384 |
|
| 385 |
my $name=$1; |
| 386 |
$name=get_fieldname($name); |
| 387 |
|
| 388 |
$host =~ s/\_/ /g; |
| 389 |
$host =~ s/(\S+)\s(\d+)/ /g; |
| 390 |
$host=$1; |
| 391 |
my $code=$2; |
| 392 |
|
| 393 |
print "$name.label $host ($code)\n"; |
| 394 |
print "$name.min 0\n"; |
| 395 |
print "$name.max 10000\n"; |
| 396 |
if($count eq 0){
|
| 397 |
print "$name.draw AREA\n"; |
| 398 |
} else {
|
| 399 |
print "$name.draw STACK\n"; |
| 400 |
} |
| 401 |
$count+=1; |
| 402 |
} |
| 403 |
} |
| 404 |
} |
| 405 |
} |
| 406 |
|
| 407 |
sub type_config{
|
| 408 |
my $id = $_[0]; |
| 409 |
my %urls = %{$_[1]};
|
| 410 |
my %cache = %{$_[2]};
|
| 411 |
|
| 412 |
my $count = 0; |
| 413 |
|
| 414 |
graph_title_config($id, \%urls, "type"); |
| 415 |
|
| 416 |
print "graph_vlabel Content type count\n"; |
| 417 |
print "graph_total Total\n"; |
| 418 |
print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n";
|
| 419 |
|
| 420 |
if(keys(%cache)>0){
|
| 421 |
for my $key ( sort reverse keys %cache ){
|
| 422 |
my $value=$cache{$key};
|
| 423 |
|
| 424 |
if($key =~ m/^type_(\S+)$/){
|
| 425 |
my $type=$1; |
| 426 |
my $value=$value; |
| 427 |
|
| 428 |
my $name=$1; |
| 429 |
$name=get_fieldname($name); |
| 430 |
|
| 431 |
#$host =~ s/\_/ /g; |
| 432 |
#$host =~ s/(\S+)\s(\S+)/ /g; |
| 433 |
#$host=$1; |
| 434 |
#my $type=$2; |
| 435 |
|
| 436 |
print "$name.label $type\n"; |
| 437 |
print "$name.min 0\n"; |
| 438 |
print "$name.max 100000\n"; |
| 439 |
if($count eq 0){
|
| 440 |
print "$name.draw AREA\n"; |
| 441 |
} else {
|
| 442 |
print "$name.draw STACK\n"; |
| 443 |
} |
| 444 |
$count+=1; |
| 445 |
} |
| 446 |
} |
| 447 |
} |
| 448 |
} |
| 449 |
|
| 450 |
sub tags_config{
|
| 451 |
my $id = $_[0]; |
| 452 |
my %urls = %{$_[1]};
|
| 453 |
my %cache = %{$_[2]};
|
| 454 |
|
| 455 |
my $count = 0; |
| 456 |
|
| 457 |
graph_title_config($id, \%urls, "tags"); |
| 458 |
|
| 459 |
print "graph_vlabel HTML tag count\n"; |
| 460 |
print "graph_total Total\n"; |
| 461 |
print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n";
|
| 462 |
|
| 463 |
if(keys(%cache)>0){
|
| 464 |
for my $key ( sort reverse keys %cache ){
|
| 465 |
my $value=$cache{$key};
|
| 466 |
|
| 467 |
if($key =~ m/^tags_(\S+)$/){
|
| 468 |
my $host=$1; |
| 469 |
my $value=$value; |
| 470 |
|
| 471 |
my $name=$1; |
| 472 |
$name=get_fieldname($name); |
| 473 |
|
| 474 |
$host =~ s/\W/ /g; |
| 475 |
|
| 476 |
print "$name.label $host\n"; |
| 477 |
print "$name.min 0\n"; |
| 478 |
print "$name.max 100000\n"; |
| 479 |
if($count eq 0){
|
| 480 |
print "$name.draw AREA\n"; |
| 481 |
} else {
|
| 482 |
print "$name.draw STACK\n"; |
| 483 |
} |
| 484 |
$count+=1; |
| 485 |
} |
| 486 |
} |
| 487 |
} |
| 488 |
} |
| 489 |
|
| 490 |
sub cache_values{
|
| 491 |
my %cache = %{$_[0]};
|
| 492 |
my $type = $_[1]; |
| 493 |
|
| 494 |
if(keys(%cache)>0){
|
| 495 |
for my $key ( sort keys %cache ){
|
| 496 |
my $value=$cache{$key};
|
| 497 |
if($key =~ m/^([A-Za-z]+)\_(\S+)$/){
|
| 498 |
my $name=$2; |
| 499 |
|
| 500 |
if ($1 eq $type){
|
| 501 |
$name=get_fieldname($name); |
| 502 |
print $name . ".value " . $value . "\n"; |
| 503 |
} |
| 504 |
} elsif(m/^(\S+)\s+(\S+)$/){
|
| 505 |
if ($1 eq $type){
|
| 506 |
print $1 . ".value " . $2 . "\n"; |
| 507 |
} |
| 508 |
} |
| 509 |
} |
| 510 |
} |
| 511 |
} |
| 512 |
|
| 513 |
sub multi_config{
|
| 514 |
my $id = $_[0]; |
| 515 |
my %urls = %{$_[1]};
|
| 516 |
my %cache = %{$_[2]};
|
| 517 |
|
| 518 |
my $count = 0; |
| 519 |
|
| 520 |
|
| 521 |
print "multigraph http_load_$id\n"; |
| 522 |
loadtime_config($id, \%urls, \%cache); |
| 523 |
|
| 524 |
print "\nmultigraph http_load_$id.loadtime\n"; |
| 525 |
loadtime_config($id, \%urls, \%cache); |
| 526 |
|
| 527 |
print "\nmultigraph http_load_$id.size\n"; |
| 528 |
size_config($id, \%urls, \%cache); |
| 529 |
|
| 530 |
print "\nmultigraph http_load_$id.elements\n"; |
| 531 |
elements_config($id, \%urls, \%cache); |
| 532 |
|
| 533 |
print "\nmultigraph http_load_$id.response\n"; |
| 534 |
response_config($id, \%urls, \%cache); |
| 535 |
|
| 536 |
print "\nmultigraph http_load_$id.type\n"; |
| 537 |
type_config($id, \%urls, \%cache); |
| 538 |
|
| 539 |
print "\nmultigraph http_load_$id.tags\n"; |
| 540 |
tags_config($id, \%urls, \%cache); |
| 541 |
|
| 542 |
} |
| 543 |
|
| 544 |
sub multi_values{
|
| 545 |
my $id = $_[0]; |
| 546 |
my %cache = %{$_[1]};
|
| 547 |
|
| 548 |
my $count = 0; |
| 549 |
|
| 550 |
|
| 551 |
print "multigraph http_load_$id\n"; |
| 552 |
cache_values(\%cache, "loadtime"); |
| 553 |
|
| 554 |
print "\nmultigraph http_load_$id.loadtime\n"; |
| 555 |
cache_values(\%cache, "loadtime"); |
| 556 |
|
| 557 |
print "\nmultigraph http_load_$id.size\n"; |
| 558 |
cache_values(\%cache, "size"); |
| 559 |
|
| 560 |
print "\nmultigraph http_load_$id.elements\n"; |
| 561 |
cache_values(\%cache, "elements"); |
| 562 |
|
| 563 |
print "\nmultigraph http_load_$id.response\n"; |
| 564 |
cache_values(\%cache, "response"); |
| 565 |
|
| 566 |
print "\nmultigraph http_load_$id.type\n"; |
| 567 |
cache_values(\%cache, "type"); |
| 568 |
|
| 569 |
print "\nmultigraph http_load_$id.tags\n"; |
| 570 |
cache_values(\%cache, "tags"); |
| 571 |
|
| 572 |
} |
| 573 |
$debug && print "Scriptname: " . $scriptname . "\n"; |
| 574 |
|
| 575 |
# Get the url id and the type of the graph |
| 576 |
# |
| 577 |
# The filename format is http_load_X_Y where |
| 578 |
# X: The line number in urls.txt |
| 579 |
# Y: The type of graph (elements, size, loadtime, ..) |
| 580 |
|
| 581 |
my ($id,$type); |
| 582 |
$0 =~ /http_load(?:_([^_]+)|)(_(.+))?\s*$/; |
| 583 |
$id = $1; |
| 584 |
$type = $3; |
| 585 |
|
| 586 |
if($type eq "") {
|
| 587 |
$type = "multi"; |
| 588 |
} |
| 589 |
|
| 590 |
$debug && print "Id: $id, Type: $type\n"; |
| 591 |
|
| 592 |
if($ARGV[0] and $ARGV[0] eq "autoconf") {
|
| 593 |
my %urls=&read_urls($url_file); |
| 594 |
if(keys(%urls) gt 0){
|
| 595 |
print "yes\n"; |
| 596 |
exit(0); |
| 597 |
} else {
|
| 598 |
print "no\n"; |
| 599 |
exit(1); |
| 600 |
} |
| 601 |
|
| 602 |
} elsif($ARGV[0] and $ARGV[0] eq "suggest") {
|
| 603 |
# get the url list, print suggestions for usage |
| 604 |
my %urls=&read_urls($url_file); |
| 605 |
while ( my ($id, $url) = each(%urls) ) {
|
| 606 |
$debug && print "id: $id => url: $url\n"; |
| 607 |
print $id . "\n"; |
| 608 |
} |
| 609 |
exit(0); |
| 610 |
|
| 611 |
} elsif($ARGV[0] and $ARGV[0] eq "cron") {
|
| 612 |
# This thing is run by cron and should write a cache file for munin-node to |
| 613 |
# read from |
| 614 |
|
| 615 |
my $verbose=0; |
| 616 |
if( |
| 617 |
$ENV{MUNIN_DEBUG} eq "1" or
|
| 618 |
$ARGV[1] and $ARGV[1] eq "verbose" |
| 619 |
) {
|
| 620 |
$verbose=1; |
| 621 |
print "Verbose output\n"; |
| 622 |
} |
| 623 |
|
| 624 |
my %urls=&read_urls($url_file); |
| 625 |
my %output; |
| 626 |
my %res; |
| 627 |
my $t0; |
| 628 |
my ($request,$response,$status,$link,$contents,$page_parser,$cachefile); |
| 629 |
|
| 630 |
while ( my ($id, $url) = each(%urls) ) {
|
| 631 |
$verbose && print "Fetching $url (id: $id)... \n"; |
| 632 |
|
| 633 |
$t0=0; |
| 634 |
$status=0; |
| 635 |
%output=(); |
| 636 |
my $host=""; |
| 637 |
if($url =~ m/\w+\:\/\/([^\/]+).*/){
|
| 638 |
$host=$1; |
| 639 |
$verbose && print " Host: $host\n"; |
| 640 |
} |
| 641 |
|
| 642 |
$output{"url"}=$url;
|
| 643 |
$output{"timestamp"}=time();
|
| 644 |
$verbose && print " Timestamp: " . $output{"timestamp"} . "\n";
|
| 645 |
|
| 646 |
my $browser = LWP::UserAgent->new(); |
| 647 |
|
| 648 |
$browser->agent($useragent); |
| 649 |
$browser->timeout(${timeout});
|
| 650 |
$browser->max_redirect( $max_redirects ); |
| 651 |
$browser->conn_cache(LWP::ConnCache->new()); |
| 652 |
|
| 653 |
$response = $browser->get($url); |
| 654 |
|
| 655 |
# Calculating time from now: |
| 656 |
$t0 = [gettimeofday]; |
| 657 |
if ($response->is_success()) {
|
| 658 |
$status=1; |
| 659 |
$output{"elements_" . $host}+=1;
|
| 660 |
} |
| 661 |
|
| 662 |
$contents = $response->content(); |
| 663 |
$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
|
| 664 |
$output{"size_" . $host}+=length($contents);
|
| 665 |
$output{"response_" . $host . "_" . $response->code}+=1;
|
| 666 |
$output{"type_" . $response->content_type}+=1;
|
| 667 |
|
| 668 |
# For <link />s, also capture the rel attribute |
| 669 |
$HTML::Tagset::linkElements{'link'} = [ qw( href rel ) ];
|
| 670 |
$page_parser = HTML::LinkExtor->new(undef, $url); |
| 671 |
$page_parser->parse($contents)->eof; |
| 672 |
my @links = $page_parser->links; |
| 673 |
$verbose && print " Processing links:\n"; |
| 674 |
|
| 675 |
%res=(); |
| 676 |
foreach $link (@links){
|
| 677 |
my $tag; |
| 678 |
my($t, %attrs) = @{$link};
|
| 679 |
if ($attrs{rel} =~ /.*\/([^\/]+)/) {
|
| 680 |
$tag=$$link[0] . " " . $1; |
| 681 |
} else {
|
| 682 |
$tag=$$link[0] . " " . $$link[1]; |
| 683 |
} |
| 684 |
$output{"tags_" . $$link[0] . "-" . $$link[1]}+=1;
|
| 685 |
|
| 686 |
if(filter($tag)){
|
| 687 |
$verbose && print " Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n"; |
| 688 |
|
| 689 |
# Extract the hostname and add it to the hash |
| 690 |
if($$link[2] =~ m/https?\:\/\/([^\/]+).*/){
|
| 691 |
$host=$1; |
| 692 |
$output{"elements_" . $host}+=1;
|
| 693 |
} |
| 694 |
|
| 695 |
my $suburl=$$link[2]; |
| 696 |
|
| 697 |
$t0 = [gettimeofday]; |
| 698 |
$response = $browser->get($suburl); |
| 699 |
$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
|
| 700 |
|
| 701 |
$contents = $response->content(); |
| 702 |
$output{"size_" . $host}+=length($contents);
|
| 703 |
$output{"response_" . $host . "_" . $response->code}+=1;
|
| 704 |
$output{"type_" . $response->content_type}+=1;
|
| 705 |
|
| 706 |
$verbose && print " Response: " . $response->code . " Size: " . length($contents) . "\n"; |
| 707 |
} else {
|
| 708 |
$verbose && print " Skipping: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n"; |
| 709 |
} |
| 710 |
} |
| 711 |
|
| 712 |
$cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id); |
| 713 |
$debug && print "Reading cache file: " . $cachefile . "... "; |
| 714 |
|
| 715 |
my %input=read_cache($cachefile); |
| 716 |
|
| 717 |
$debug && print "done\n"; |
| 718 |
|
| 719 |
# Resetting all values to 0 before adding new values |
| 720 |
while ( my ($id, $value) = each(%input) ) {
|
| 721 |
$input{$id}="U";
|
| 722 |
} |
| 723 |
|
| 724 |
# Adding new values |
| 725 |
while ( my ($id, $value) = each(%output) ) {
|
| 726 |
$input{$id}=$value;
|
| 727 |
$verbose && print " Result: " . $id . " -> " . $value . "\n"; |
| 728 |
} |
| 729 |
|
| 730 |
# Writing the cache |
| 731 |
$verbose && print "Writing cache file: " . $cachefile . "... "; |
| 732 |
open(FILE,">".$cachefile); |
| 733 |
while ( my ($id, $value) = each(%input) ) {
|
| 734 |
print FILE $id . " " . $value . "\n"; |
| 735 |
} |
| 736 |
close(FILE); |
| 737 |
$verbose && print "done\n"; |
| 738 |
} |
| 739 |
exit(0); |
| 740 |
}elsif($ARGV[0] and $ARGV[0] eq "config") {
|
| 741 |
my %urls=&read_urls($url_file); |
| 742 |
|
| 743 |
$debug && print "Reading cache file\n"; |
| 744 |
my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id); |
| 745 |
my %cache=read_cache($cachefile); |
| 746 |
|
| 747 |
$debug && print "The cache file contains " . keys(%cache) . " lines\n"; |
| 748 |
|
| 749 |
if($type eq "size"){
|
| 750 |
size_config($id, \%urls, \%cache) |
| 751 |
}elsif($type eq "loadtime"){
|
| 752 |
loadtime_config($id, \%urls, \%cache) |
| 753 |
}elsif($type eq "elements"){
|
| 754 |
elements_config($id, \%urls, \%cache) |
| 755 |
}elsif($type eq "response"){
|
| 756 |
response_config($id, \%urls, \%cache) |
| 757 |
}elsif($type eq "type"){
|
| 758 |
type_config($id, \%urls, \%cache) |
| 759 |
}elsif($type eq "tags"){
|
| 760 |
tags_config($id, \%urls, \%cache) |
| 761 |
}elsif($type eq "multi"){
|
| 762 |
multi_config($id, \%urls, \%cache) |
| 763 |
} |
| 764 |
exit(0); |
| 765 |
} else {
|
| 766 |
my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id); |
| 767 |
$debug && print "Reading cache file: " . $cachefile . "\n"; |
| 768 |
my %cache=read_cache($cachefile); |
| 769 |
$debug && print "Number of lines in cache file: " . keys(%cache) . "\n"; |
| 770 |
|
| 771 |
if($type eq "multi"){
|
| 772 |
multi_values($id, \%cache); |
| 773 |
} else {
|
| 774 |
cache_values(\%cache, $type); |
| 775 |
} |
| 776 |
} |
| 777 |
|
| 778 |
# vim:syntax=perl |
