Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / http / http_load_ @ 4b2fcbf8

Historique | Voir | Annoter | Télécharger (15,6 ko)

1
#!/usr/bin/perl
2
# -*- perl -*-
3
#
4
# Plugin to graph http performance
5
# Version: 0.8.7
6
#
7
# The purpose of this plugin is to monitor several properties of a web page.
8
# All measurements are done for the complete web page, including images, css
9
# and other content a standard browser would download automatically.
10
#
11
# This version supports monitoring:
12
#   * The total time to download a complete web page (using serial GET requests)
13
#   * The total size of a web page
14
#   * The different response codes (200, 404, 500, etc)
15
#   * The different tags (img src, a href, etc)
16
#   * The the different content types (image/png, text/css/, etc)
17
#   * The number of elements the web page consists of
18
# 
19
# Author:  Espen Braastad / Linpro AS
20
#          espen@linpro.no
21
#
22
##### Short usage guide: #####
23
#
24
# Requirements:
25
#  * The server running this plugin must be allowed  to connect to the web 
26
#    server(s) you are going to monitor.
27
#  * Some perl modules: 
28
#    Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache
29
#
30
# Initial configuration:
31
#  1. Copy this file to /usr/share/munin/plugins/
32
#
33
#  2. Create a file (/etc/munin/http_load_urls.txt) with one
34
#     full url per line, as many as you want, i.e.:
35
#      $ echo "http://www.dn.no/" >> /etc/munin/urls.txt
36
#      $ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt
37
#
38
#  3. Add a cron job running the plugin with cron as the argument:
39
#     */15 * * * * <user> /usr/share/munin/plugins/http_load_ cron
40
#     <user> should be the user that has write permission to
41
#     the $cachedir directory set below. Set the intervals to
42
#     whatever you want.
43
#
44
#     For verbose output (for debugging) you can do:
45
#     sudo -u <user> /usr/share/munin/plugins/http_load_ cron verbose
46
#
47
#  4. Run munin-node-configure --suggest --shell and run the symlink
48
#     commands manually to update the munin-node plugin list.
49
#  
50
# (5. If you want to change the filter which the plugin uses to select which
51
#     tags to follow in a web page, edit the subroutine called "filter" below.)
52
#
53
# Add a new url to monitor:
54
#  1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.:
55
#      $ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt
56
#
57
#  2. Run munin-node-configure --suggest --shell and manually
58
#     add the new symlink(s)
59
#
60
#  3. /etc/init.d/munin-node restart
61
#
62
# Remove a url from monitoring:
63
#  1. Remove it from /etc/munin/http_load_urls.txt
64
#
65
#  2. Remove ${cachedir}/http_load_<url_id>*
66
#
67
#  3. Remove /etc/munin/plugins/http_load_<url_id>*
68
#
69
#  4. /etc/init.d/munin-node restart
70
#
71
#####
72
#
73
# Todo:
74
#   * Add support for forking to simulate real browsers
75
#   * Use checksums as fieldnames
76
#
77
# $Id: $
78
#
79
# Magic markers:
80
#%# family=auto
81
#%# capabilities=autoconf suggest
82

    
83
use strict;
84
use Time::HiRes qw( gettimeofday tv_interval );
85
use LWP::UserAgent;
86
use HTML::LinkExtor;
87
use LWP::ConnCache;
88

    
89
my $url_file="/etc/munin/http_load_urls.txt";
90
my $cachedir=$ENV{MUNIN_PLUGSTATE};
91

    
92
my $debug=0;
93
my $timeout=10;
94
my $max_redirects=10;
95
my $scriptname="http_load_";
96
my $useragent="Mozilla/5.0";
97

    
98
# Function to read the $url_file and return the contents in a hash
99
sub read_urls{
100
	my $file=$_[0];
101
	my %urls=();
102
	if(-r $file){
103
		open(FILE,'<'.$file);
104
		while (<FILE>) { 
105
			my $url=$_;
106
			chomp($url);
107
			my $id=get_id($url);
108
			if(length($id)>0){
109
				$urls{$id}=$url;
110
			}
111
		}
112
		close (FILE);
113
	}
114
	return %urls;
115
}
116

    
117
# Function to read cache, return a hash
118
sub read_cache{
119
	my $file=$_[0];
120
	my %cache=();
121
	if(-r $file){
122
		open(FILE,'<'.$file);
123
		while (<FILE>) { 
124
			m/^(\S*)\s+(.*)$/;
125
			$cache{ $1 } = $2;
126
		}
127
		close (FILE);
128
	}
129
	return %cache;
130
}
131

    
132
# Function to filter the html tags, which files do we want to download
133
sub filter{
134
	my $tag=$_[0];
135
	my $status=1;
136

    
137
	# Some example data:
138
	# link href http://www.intrafish.no/template/include/css/intrafish.css
139
	# script src http://www.intrafish.no/template/include/js/intrafish.js
140
	# a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300;
141
	# img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;
142
	# area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg
143

    
144
	# status=1 => do download (default)
145
	# status=0 => do not download
146

    
147
	if("$tag" eq "form action"){
148
		$status=0;
149
	}
150
	if("$tag" eq "a href"){
151
		$status=0;
152
	}
153
	if("$tag" eq "area href"){
154
		$status=0;
155
	}
156
	return $status;
157
}
158

    
159
# Return the cache file name for this plugin
160
sub get_cache_file_name{
161
	my $scriptname=$_[0];
162
	my $id=$_[1];
163
	my $type=$_[2];
164
	my $file="";
165

    
166
	$file = $scriptname . $id . ".cache";
167
	$debug && print "Cache file: " . $file . "\n";
168

    
169
	return $file;
170
}
171

    
172
# Get fieldname (making sure it is munin-1.0 "compatible" as a fieldname)
173
# 1. Remove all non-word characters from a string)
174
# 2. Make sure it has maximum 19 characters
175
#    2.1 If not, truncate the host part, while keeping anything after an underscore (e.g., HTTP response status)
176
sub get_fieldname{
177
	my $url=$_[0];
178
	$url =~ s/\W//g;
179
	if(length($url) > 19){
180
		$url =~ s/(\S+)_(\S+)/ /g;
181
		my $host = $1;
182
		my $info = $2;
183
		my $suffixlength = length($info) + 1;
184
		if ($suffixlength > 1) {
185
			$url = substr($host, 0, 19 - $suffixlength) . '_' . $info;
186
		} else {
187
			$url = substr($url, 0, 19);
188
		}
189
	}
190
	return $url;
191
}
192

    
193
# Same as get_fieldname except it doesn't substr
194
sub get_id{
195
	my $url=$_[0];
196
	$url =~ s/[\W_]//g;
197
	return $url;
198
}
199

    
200
$debug && print "Scriptname: " . $scriptname . "\n";
201

    
202
# Get the url id and the type of the graph
203
#
204
# The filename format is http_load_X_Y where
205
# X: The line number in urls.txt
206
# Y: The type of graph (elements, size, loadtime, ..)
207

    
208
my ($id,$type);
209
$0 =~ /http_load(?:_([^_]+)|)_(.+)\s*$/;
210
$id  = $1;
211
$type = $2;
212

    
213
$debug && print "Id: $id, Type: $type\n";
214

    
215
if($ARGV[0] and $ARGV[0] eq "autoconf") {
216
	my %urls=&read_urls($url_file);
217
	if(keys(%urls) gt 0){
218
		print "yes\n";
219
		exit(0);
220
	} else {
221
		print "no\n";
222
		exit(1);
223
	}
224

    
225
} elsif($ARGV[0] and $ARGV[0] eq "suggest") {
226
	# get the url list, print suggestions for usage
227
	my %urls=&read_urls($url_file);
228
	while ( my ($id, $url) = each(%urls) ) {
229
        	$debug && print "id: $id => url: $url\n";
230
        	print $id . "_size\n";
231
        	print $id . "_loadtime\n";
232
        	print $id . "_response\n";
233
        	print $id . "_tags\n";
234
        	print $id . "_type\n";
235
        	print $id . "_elements\n";
236
    	}
237
	exit(0);
238

    
239
} elsif($ARGV[0] and $ARGV[0] eq "cron") {
240
	# This thing is run by cron and should write a cache file for munin-node to 
241
	# read from
242

    
243
	my $verbose=0;
244
	if($ARGV[1] and $ARGV[1] eq "verbose") {
245
		$verbose=1;
246
		print "Verbose output\n";
247
	}
248

    
249
	my %urls=&read_urls($url_file);
250
	my %output;
251
	my %res;
252
	my $t0;
253
	my ($request,$response,$status,$link,$contents,$page_parser,$cachefile);
254

    
255
	while ( my ($id, $url) = each(%urls) ) {
256
        	$verbose && print "Fetching $url (id: $id)... \n";
257
		
258
		$t0=0;
259
		$status=0;
260
		%output=();
261
		my $host="";
262
		if($url =~ m/\w+\:\/\/([^\/]+).*/){
263
			$host=$1;
264
        		$verbose && print " Host: $host\n";
265
		}
266

    
267
		$output{"url"}=$url;
268
		$output{"timestamp"}=time();
269
        	$verbose && print " Timestamp: " . $output{"timestamp"} . "\n";
270

    
271
	        my $browser = LWP::UserAgent->new();
272

    
273
		$browser->agent($useragent);
274
	        $browser->timeout(${timeout});
275
		$browser->max_redirect( $max_redirects );
276
		$browser->conn_cache(LWP::ConnCache->new());
277

    
278
		$response = $browser->get($url);
279

    
280
		# Calculating time from now:
281
		$t0 = [gettimeofday];
282
	        if ($response->is_success()) {
283
	                $status=1;
284
			$output{"elements_" . $host}+=1;
285
	        }
286

    
287
        	$contents = $response->content();
288
	        $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
289
        	$output{"size_" . $host}+=length($contents);
290
		$output{"response_" . $host . "_" . $response->code}+=1;
291
		$output{"type_" . $response->content_type}+=1;
292

    
293
	        $page_parser = HTML::LinkExtor->new(undef, $url);
294
	        $page_parser->parse($contents)->eof;
295
	        my @links = $page_parser->links;
296
        	$verbose && print " Processing links:\n";
297

    
298
        	%res=();
299
	        foreach $link (@links){
300
			my $tag=$$link[0] . " " . $$link[1];
301
			
302
			$output{"tags_" . $$link[0] . "-" . $$link[1]}+=1;
303
	
304
			if(filter($tag)){
305
				$verbose && print "  Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
306

    
307
				# Extract the hostname and add it to the hash
308
				if($$link[2] =~ m/http\:\/\/([^\/]+).*/){
309
					$host=$1;
310
					$output{"elements_" . $host}+=1;
311
				}
312

    
313
                	        my $suburl=$$link[2];
314
	
315
				$t0 = [gettimeofday];
316
				$response = $browser->get($suburl);
317
	        		$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
318

    
319
        			$contents = $response->content();
320
        			$output{"size_" . $host}+=length($contents);
321
				$output{"response_" . $host . "_" . $response->code}+=1;
322
				$output{"type_" . $response->content_type}+=1;
323

    
324
				$verbose && print "              Response: " . $response->code . " Size: " . length($contents) . "\n";
325
			} else {
326
				$verbose && print "  Skipping:   " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
327
			}
328
		}
329

    
330
		$cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
331
		$debug && print "Reading cache file: " . $cachefile . "... ";
332

    
333
		my %input=read_cache($cachefile);
334

    
335
		$debug && print "done\n";
336

    
337
		# Resetting all values to 0 before adding new values
338
		while ( my ($id, $value) = each(%input) ) {
339
			$input{$id}="U";
340
    		}
341
		
342
		# Adding new values
343
		while ( my ($id, $value) = each(%output) ) {
344
			$input{$id}=$value;
345
        		$verbose && print " Result: " . $id . " -> " . $value . "\n";
346
    		}
347
		
348
		# Writing the cache
349
		$verbose && print "Writing cache file: " . $cachefile . "... ";
350
		open(FILE,">".$cachefile);
351
		while ( my ($id, $value) = each(%input) ) {
352
			print FILE $id . " " . $value . "\n";
353
		}
354
		close(FILE);
355
		$verbose && print "done\n";
356
	}
357
	exit(0);
358
}elsif($ARGV[0] and $ARGV[0] eq "config") {
359
	my %urls=&read_urls($url_file);
360
	
361
        print "graph_title $urls{$id} ${type}\n";
362
        print "graph_args -l 0 --base 1000\n";
363
        print "graph_category webserver\n";
364
	$debug && print "Reading cache file\n";
365
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
366
	my %cache=read_cache($cachefile);
367

    
368
	my $count=0;
369
	$debug && print "The cache file contains " . keys(%cache) . " lines\n";
370

    
371
	if($type eq "size"){
372
                print "graph_vlabel Bytes\n";
373
		print "graph_total Total\n";
374
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n";
375

    
376
		if(keys(%cache)>0){
377
			for my $key ( sort reverse keys %cache ){
378
				my $value=$cache{$key};
379
				
380
				if($key =~ m/^size_(\S+)$/){
381
					my $host=$1;
382
					my $value=$value;
383

    
384
					my $name=$1;
385
					$name=get_fieldname($name);
386
	
387
        	        		print "$name.label from $host\n";
388
        	        		print "$name.min 0\n";
389
        	        		print "$name.max 20000000\n";
390
					if($count eq 0){
391
						print "$name.draw AREA\n";
392
					} else {
393
						print "$name.draw STACK\n";
394
					}
395
					$count+=1;
396
				}
397
			}
398
		}
399
	}elsif($type eq "loadtime"){
400
                print "graph_vlabel Seconds\n";
401
		print "graph_total Total\n";
402
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. ";
403
		print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n";
404
		
405
		if(keys(%cache)>0){
406
			for my $key ( sort reverse keys %cache ){
407
				my $value=$cache{$key};
408

    
409
				if($key =~ m/^loadtime_(\S+)$/){
410
					my $host=$1;
411
					my $value=$value;
412

    
413
					my $name=$1;
414
					$name=get_fieldname($name);
415
	
416
        	        		print "$name.label from $host\n";
417
        	        		print "$name.min 0\n";
418
        	        		print "$name.max 400\n";
419
					if($count eq 0){
420
						print "$name.draw AREA\n";
421
					} else {
422
						print "$name.draw STACK\n";
423
					}
424
					$count+=1;
425
				}
426
			}
427
		}
428

    
429
	}elsif($type eq "elements"){
430
                print "graph_vlabel Number of elements\n";
431
		print "graph_total Total\n";
432
        	print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n";
433
	
434
		if(keys(%cache)>0){
435
			for my $key ( sort reverse keys %cache ){
436
				my $value=$cache{$key};
437
       
438
				if($key =~ m/^elements_(\S+)$/){
439
					my $host=$1;
440
					my $value=$value;
441

    
442
					my $name=$1;
443
					$name=get_fieldname($name);
444
	
445
        	        		print "$name.label from $host\n";
446
        	        		print "$name.min 0\n";
447
        	        		print "$name.max 10000\n";
448
					if($count eq 0){
449
						print "$name.draw AREA\n";
450
					} else {
451
						print "$name.draw STACK\n";
452
					}
453
					$count+=1;
454
				}
455
			}
456
		}
457
	}elsif($type eq "response"){
458
                print "graph_vlabel Server response code count\n";
459
		print "graph_total Total\n";
460
         	print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n";
461

    
462
		if(keys(%cache)>0){
463
			for my $key ( sort reverse keys %cache ){
464
				my $value=$cache{$key};
465

    
466
				if($key =~ m/^response_(\S+)$/){
467
					my $host=$1;
468
					my $value=$value;
469

    
470
					my $name=$1;
471
					$name=get_fieldname($name);
472

    
473
					$host =~ s/\_/ /g;
474
					$host =~ s/(\S+)\s(\d+)/ /g;
475
					$host=$1;
476
					my $code=$2;
477
	
478
        	        		print "$name.label $host ($code)\n";
479
        	        		print "$name.min 0\n";
480
        	        		print "$name.max 10000\n";
481
					if($count eq 0){
482
						print "$name.draw AREA\n";
483
					} else {
484
						print "$name.draw STACK\n";
485
					}
486
					$count+=1;
487
				}
488
			}
489
		}
490
	}elsif($type eq "type"){
491
                print "graph_vlabel Content type count\n";
492
		print "graph_total Total\n";
493
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n";
494

    
495
		if(keys(%cache)>0){
496
			for my $key ( sort reverse keys %cache ){
497
				my $value=$cache{$key};
498

    
499
				if($key =~ m/^type_(\S+)$/){
500
					my $type=$1;
501
					my $value=$value;
502

    
503
					my $name=$1;
504
					$name=get_fieldname($name);
505

    
506
					#$host =~ s/\_/ /g;
507
					#$host =~ s/(\S+)\s(\S+)/ /g;
508
					#$host=$1;
509
					#my $type=$2;
510
	
511
        	        		print "$name.label $type\n";
512
        	        		print "$name.min 0\n";
513
        	        		print "$name.max 100000\n";
514
					if($count eq 0){
515
						print "$name.draw AREA\n";
516
					} else {
517
						print "$name.draw STACK\n";
518
					}
519
					$count+=1;
520
				}
521
			}
522
		}
523
	}elsif($type eq "tags"){
524
                print "graph_vlabel HTML tag count\n";
525
		print "graph_total Total\n";
526
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n";
527

    
528
		if(keys(%cache)>0){
529
			for my $key ( sort reverse keys %cache ){
530
				my $value=$cache{$key};
531

    
532
				if($key =~ m/^tags_(\S+)$/){
533
					my $host=$1;
534
					my $value=$value;
535

    
536
					my $name=$1;
537
					$name=get_fieldname($name);
538

    
539
					$host =~ s/\W/ /g;
540
	
541
        	        		print "$name.label $host\n";
542
        	        		print "$name.min 0\n";
543
        	        		print "$name.max 100000\n";
544
					if($count eq 0){
545
						print "$name.draw AREA\n";
546
					} else {
547
						print "$name.draw STACK\n";
548
					}
549
					$count+=1;
550
				}
551
			}
552
		}
553
	}
554
	exit(0); 
555
} else {
556
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
557
	$debug && print "Reading cache file: " . $cachefile . "\n";
558
	my %cache=read_cache($cachefile);
559
	$debug && print "Number of lines in cache file: " . keys(%cache) . "\n";
560

    
561
	if(keys(%cache)>0){
562
		for my $key ( sort keys %cache ){
563
			my $value=$cache{$key};
564
			if($key =~ m/^([A-Za-z]+)\_(\S+)$/){
565
				my $name=$2;
566
				
567
				if ($1 eq $type){
568
					$name=get_fieldname($name);
569
					print $name . ".value " . $value . "\n";
570
				}
571
			} elsif(m/^(\S+)\s+(\S+)$/){
572
				if ($1 eq $type){
573
					print $1 . ".value " . $2 . "\n";
574
				}
575
			}
576
		}
577
	}
578
} 
579

    
580
# vim:syntax=perl