Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / http / http_load_ @ caea7766

Historique | Voir | Annoter | Télécharger (15,8 ko)

1
#!/usr/bin/perl
2
# -*- perl -*-
3
#
4
# Plugin to graph http performance
5
# Version: 0.8.7
6
#
7
# The purpose of this plugin is to monitor several properties of a web page.
8
# All measurements are done for the complete web page, including images, css
9
# and other content a standard browser would download automatically.
10
#
11
# This version supports monitoring:
12
#   * The total time to download a complete web page (using serial GET requests)
13
#   * The total size of a web page
14
#   * The different response codes (200, 404, 500, etc)
15
#   * The different tags (img src, a href, etc)
16
#   * The the different content types (image/png, text/css/, etc)
17
#   * The number of elements the web page consists of
18
# 
19
# Author:  Espen Braastad / Linpro AS
20
#          espen@linpro.no
21
#
22
##### Short usage guide: #####
23
#
24
# Requirements:
25
#  * The server running this plugin must be allowed  to connect to the web 
26
#    server(s) you are going to monitor.
27
#  * Some perl modules: 
28
#    Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache
29
#
30
# Initial configuration:
31
#  1. Copy this file to /usr/share/munin/plugins/
32
#
33
#  2. Create a file (/etc/munin/http_load_urls.txt) with one
34
#     full url per line, as many as you want, i.e.:
35
#      $ echo "http://www.dn.no/" >> /etc/munin/urls.txt
36
#      $ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt
37
#
38
#  3. Add a cron job running the plugin with cron as the argument:
39
#     */15 * * * * <user> /usr/sbin/munin-run http_load_<site>_loadtime cron
40
#     <user> should be the user that has write permission to the $cachedir
41
#     directory set below. <site> should be any of the configured sites (all
42
#     sites will get updated), likewise, you should replace loadtime by any
43
#     metric that is enabled for that site (all metrics will get updated).
44
#     Set the intervals to whatever you want.
45
#
46
#     For verbose output (for debugging) you can do:
47
#     sudo -u <user> /usr/share/munin/plugins/http_load_ cron verbose
48
#
49
#  4. Run munin-node-configure --suggest --shell and run the symlink
50
#     commands manually to update the munin-node plugin list.
51
#  
52
# (5. If you want to change the filter which the plugin uses to select which
53
#     tags to follow in a web page, edit the subroutine called "filter" below.)
54
#
55
# Add a new url to monitor:
56
#  1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.:
57
#      $ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt
58
#
59
#  2. Run munin-node-configure --suggest --shell and manually
60
#     add the new symlink(s)
61
#
62
#  3. /etc/init.d/munin-node restart
63
#
64
# Remove a url from monitoring:
65
#  1. Remove it from /etc/munin/http_load_urls.txt
66
#
67
#  2. Remove ${cachedir}/http_load_<url_id>*
68
#
69
#  3. Remove /etc/munin/plugins/http_load_<url_id>*
70
#
71
#  4. /etc/init.d/munin-node restart
72
#
73
#####
74
#
75
# Todo:
76
#   * Add support for forking to simulate real browsers
77
#   * Use checksums as fieldnames
78
#
79
# $Id: $
80
#
81
# Magic markers:
82
#%# family=auto
83
#%# capabilities=autoconf suggest
84

    
85
use strict;
86
use Time::HiRes qw( gettimeofday tv_interval );
87
use LWP::UserAgent;
88
use HTML::LinkExtor;
89
use LWP::ConnCache;
90

    
91
my $url_file="/etc/munin/http_load_urls.txt";
92
my $cachedir=$ENV{MUNIN_PLUGSTATE};
93

    
94
my $debug=$ENV{MUNIN_DEBUG};
95
my $timeout=10;
96
my $max_redirects=10;
97
my $scriptname="http_load_";
98
my $useragent="Mozilla/5.0";
99

    
100
# Function to read the $url_file and return the contents in a hash
101
sub read_urls{
102
	my $file=$_[0];
103
	my %urls=();
104
	if(-r $file){
105
		open(FILE,'<'.$file);
106
		while (<FILE>) { 
107
			my $url=$_;
108
			chomp($url);
109
			my $id=get_id($url);
110
			if(length($id)>0){
111
				$urls{$id}=$url;
112
			}
113
		}
114
		close (FILE);
115
	}
116
	return %urls;
117
}
118

    
119
# Function to read cache, return a hash
120
sub read_cache{
121
	my $file=$_[0];
122
	my %cache=();
123
	if(-r $file){
124
		open(FILE,'<'.$file);
125
		while (<FILE>) { 
126
			m/^(\S*)\s+(.*)$/;
127
			$cache{ $1 } = $2;
128
		}
129
		close (FILE);
130
	}
131
	return %cache;
132
}
133

    
134
# Function to filter the html tags, which files do we want to download
135
sub filter{
136
	my $tag=$_[0];
137
	my $status=1;
138

    
139
	# Some example data:
140
	# link href http://www.intrafish.no/template/include/css/intrafish.css
141
	# script src http://www.intrafish.no/template/include/js/intrafish.js
142
	# a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300;
143
	# img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;
144
	# area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg
145

    
146
	# status=1 => do download (default)
147
	# status=0 => do not download
148

    
149
	if("$tag" eq "form action"){
150
		$status=0;
151
	}
152
	if("$tag" eq "a href"){
153
		$status=0;
154
	}
155
	if("$tag" eq "area href"){
156
		$status=0;
157
	}
158
	return $status;
159
}
160

    
161
# Return the cache file name for this plugin
162
sub get_cache_file_name{
163
	my $scriptname=$_[0];
164
	my $id=$_[1];
165
	my $file="";
166

    
167
	$file = $scriptname . $id . ".cache";
168
	$debug && print "Cache file: " . $file . "\n";
169

    
170
	return $file;
171
}
172

    
173
# Get fieldname (making sure it is munin-1.0 "compatible" as a fieldname)
174
# 1. Remove all non-word characters from a string)
175
# 2. Make sure it has maximum 19 characters
176
#    2.1 If not, truncate the host part, while keeping anything after an underscore (e.g., HTTP response status)
177
sub get_fieldname{
178
	my $url=$_[0];
179
	$url =~ s/\W//g;
180
	if(length($url) > 19){
181
		$url =~ s/(\S+)_(\S+)/ /g;
182
		my $host = $1;
183
		my $info = $2;
184
		my $suffixlength = length($info) + 1;
185
		if ($suffixlength > 1) {
186
			$url = substr($host, 0, 19 - $suffixlength) . '_' . $info;
187
		} else {
188
			$url = substr($url, 0, 19);
189
		}
190
	}
191
	return $url;
192
}
193

    
194
# Same as get_fieldname except it doesn't substr
195
sub get_id{
196
	my $url=$_[0];
197
	$url =~ s/[\W_]//g;
198
	return $url;
199
}
200

    
201
$debug && print "Scriptname: " . $scriptname . "\n";
202

    
203
# Get the url id and the type of the graph
204
#
205
# The filename format is http_load_X_Y where
206
# X: The line number in urls.txt
207
# Y: The type of graph (elements, size, loadtime, ..)
208

    
209
my ($id,$type);
210
$0 =~ /http_load(?:_([^_]+)|)_(.+)\s*$/;
211
$id  = $1;
212
$type = $2;
213

    
214
$debug && print "Id: $id, Type: $type\n";
215

    
216
if($ARGV[0] and $ARGV[0] eq "autoconf") {
217
	my %urls=&read_urls($url_file);
218
	if(keys(%urls) gt 0){
219
		print "yes\n";
220
		exit(0);
221
	} else {
222
		print "no\n";
223
		exit(1);
224
	}
225

    
226
} elsif($ARGV[0] and $ARGV[0] eq "suggest") {
227
	# get the url list, print suggestions for usage
228
	my %urls=&read_urls($url_file);
229
	while ( my ($id, $url) = each(%urls) ) {
230
        	$debug && print "id: $id => url: $url\n";
231
        	print $id . "_size\n";
232
        	print $id . "_loadtime\n";
233
        	print $id . "_response\n";
234
        	print $id . "_tags\n";
235
        	print $id . "_type\n";
236
        	print $id . "_elements\n";
237
    	}
238
	exit(0);
239

    
240
} elsif($ARGV[0] and $ARGV[0] eq "cron") {
241
	# This thing is run by cron and should write a cache file for munin-node to 
242
	# read from
243

    
244
	my $verbose=0;
245
	if(
246
		$ENV{MUNIN_DEBUG} eq "1" or
247
		$ARGV[1] and $ARGV[1] eq "verbose"
248
	) {
249
		$verbose=1;
250
		print "Verbose output\n";
251
	}
252

    
253
	my %urls=&read_urls($url_file);
254
	my %output;
255
	my %res;
256
	my $t0;
257
	my ($request,$response,$status,$link,$contents,$page_parser,$cachefile);
258

    
259
	while ( my ($id, $url) = each(%urls) ) {
260
        	$verbose && print "Fetching $url (id: $id)... \n";
261
		
262
		$t0=0;
263
		$status=0;
264
		%output=();
265
		my $host="";
266
		if($url =~ m/\w+\:\/\/([^\/]+).*/){
267
			$host=$1;
268
        		$verbose && print " Host: $host\n";
269
		}
270

    
271
		$output{"url"}=$url;
272
		$output{"timestamp"}=time();
273
        	$verbose && print " Timestamp: " . $output{"timestamp"} . "\n";
274

    
275
	        my $browser = LWP::UserAgent->new();
276

    
277
		$browser->agent($useragent);
278
	        $browser->timeout(${timeout});
279
		$browser->max_redirect( $max_redirects );
280
		$browser->conn_cache(LWP::ConnCache->new());
281

    
282
		$response = $browser->get($url);
283

    
284
		# Calculating time from now:
285
		$t0 = [gettimeofday];
286
	        if ($response->is_success()) {
287
	                $status=1;
288
			$output{"elements_" . $host}+=1;
289
	        }
290

    
291
        	$contents = $response->content();
292
	        $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
293
        	$output{"size_" . $host}+=length($contents);
294
		$output{"response_" . $host . "_" . $response->code}+=1;
295
		$output{"type_" . $response->content_type}+=1;
296

    
297
	        $page_parser = HTML::LinkExtor->new(undef, $url);
298
	        $page_parser->parse($contents)->eof;
299
	        my @links = $page_parser->links;
300
        	$verbose && print " Processing links:\n";
301

    
302
        	%res=();
303
	        foreach $link (@links){
304
			my $tag=$$link[0] . " " . $$link[1];
305
			
306
			$output{"tags_" . $$link[0] . "-" . $$link[1]}+=1;
307
	
308
			if(filter($tag)){
309
				$verbose && print "  Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
310

    
311
				# Extract the hostname and add it to the hash
312
				if($$link[2] =~ m/http\:\/\/([^\/]+).*/){
313
					$host=$1;
314
					$output{"elements_" . $host}+=1;
315
				}
316

    
317
                	        my $suburl=$$link[2];
318
	
319
				$t0 = [gettimeofday];
320
				$response = $browser->get($suburl);
321
	        		$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
322

    
323
        			$contents = $response->content();
324
        			$output{"size_" . $host}+=length($contents);
325
				$output{"response_" . $host . "_" . $response->code}+=1;
326
				$output{"type_" . $response->content_type}+=1;
327

    
328
				$verbose && print "              Response: " . $response->code . " Size: " . length($contents) . "\n";
329
			} else {
330
				$verbose && print "  Skipping:   " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
331
			}
332
		}
333

    
334
		$cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id);
335
		$debug && print "Reading cache file: " . $cachefile . "... ";
336

    
337
		my %input=read_cache($cachefile);
338

    
339
		$debug && print "done\n";
340

    
341
		# Resetting all values to 0 before adding new values
342
		while ( my ($id, $value) = each(%input) ) {
343
			$input{$id}="U";
344
    		}
345
		
346
		# Adding new values
347
		while ( my ($id, $value) = each(%output) ) {
348
			$input{$id}=$value;
349
        		$verbose && print " Result: " . $id . " -> " . $value . "\n";
350
    		}
351
		
352
		# Writing the cache
353
		$verbose && print "Writing cache file: " . $cachefile . "... ";
354
		open(FILE,">".$cachefile);
355
		while ( my ($id, $value) = each(%input) ) {
356
			print FILE $id . " " . $value . "\n";
357
		}
358
		close(FILE);
359
		$verbose && print "done\n";
360
	}
361
	exit(0);
362
}elsif($ARGV[0] and $ARGV[0] eq "config") {
363
	my %urls=&read_urls($url_file);
364
	
365
        print "graph_title $urls{$id} ${type}\n";
366
        print "graph_args -l 0 --base 1000\n";
367
        print "graph_category webserver\n";
368
	$debug && print "Reading cache file\n";
369
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id);
370
	my %cache=read_cache($cachefile);
371

    
372
	my $count=0;
373
	$debug && print "The cache file contains " . keys(%cache) . " lines\n";
374

    
375
	if($type eq "size"){
376
                print "graph_vlabel Bytes\n";
377
		print "graph_total Total\n";
378
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n";
379

    
380
		if(keys(%cache)>0){
381
			for my $key ( sort reverse keys %cache ){
382
				my $value=$cache{$key};
383
				
384
				if($key =~ m/^size_(\S+)$/){
385
					my $host=$1;
386
					my $value=$value;
387

    
388
					my $name=$1;
389
					$name=get_fieldname($name);
390
	
391
        	        		print "$name.label from $host\n";
392
        	        		print "$name.min 0\n";
393
        	        		print "$name.max 20000000\n";
394
					if($count eq 0){
395
						print "$name.draw AREA\n";
396
					} else {
397
						print "$name.draw STACK\n";
398
					}
399
					$count+=1;
400
				}
401
			}
402
		}
403
	}elsif($type eq "loadtime"){
404
                print "graph_vlabel Seconds\n";
405
		print "graph_total Total\n";
406
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. ";
407
		print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n";
408
		
409
		if(keys(%cache)>0){
410
			for my $key ( sort reverse keys %cache ){
411
				my $value=$cache{$key};
412

    
413
				if($key =~ m/^loadtime_(\S+)$/){
414
					my $host=$1;
415
					my $value=$value;
416

    
417
					my $name=$1;
418
					$name=get_fieldname($name);
419
	
420
        	        		print "$name.label from $host\n";
421
        	        		print "$name.min 0\n";
422
        	        		print "$name.max 400\n";
423
					if($count eq 0){
424
						print "$name.draw AREA\n";
425
					} else {
426
						print "$name.draw STACK\n";
427
					}
428
					$count+=1;
429
				}
430
			}
431
		}
432

    
433
	}elsif($type eq "elements"){
434
                print "graph_vlabel Number of elements\n";
435
		print "graph_total Total\n";
436
        	print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n";
437
	
438
		if(keys(%cache)>0){
439
			for my $key ( sort reverse keys %cache ){
440
				my $value=$cache{$key};
441
       
442
				if($key =~ m/^elements_(\S+)$/){
443
					my $host=$1;
444
					my $value=$value;
445

    
446
					my $name=$1;
447
					$name=get_fieldname($name);
448
	
449
        	        		print "$name.label from $host\n";
450
        	        		print "$name.min 0\n";
451
        	        		print "$name.max 10000\n";
452
					if($count eq 0){
453
						print "$name.draw AREA\n";
454
					} else {
455
						print "$name.draw STACK\n";
456
					}
457
					$count+=1;
458
				}
459
			}
460
		}
461
	}elsif($type eq "response"){
462
                print "graph_vlabel Server response code count\n";
463
		print "graph_total Total\n";
464
         	print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n";
465

    
466
		if(keys(%cache)>0){
467
			for my $key ( sort reverse keys %cache ){
468
				my $value=$cache{$key};
469

    
470
				if($key =~ m/^response_(\S+)$/){
471
					my $host=$1;
472
					my $value=$value;
473

    
474
					my $name=$1;
475
					$name=get_fieldname($name);
476

    
477
					$host =~ s/\_/ /g;
478
					$host =~ s/(\S+)\s(\d+)/ /g;
479
					$host=$1;
480
					my $code=$2;
481
	
482
        	        		print "$name.label $host ($code)\n";
483
        	        		print "$name.min 0\n";
484
        	        		print "$name.max 10000\n";
485
					if($count eq 0){
486
						print "$name.draw AREA\n";
487
					} else {
488
						print "$name.draw STACK\n";
489
					}
490
					$count+=1;
491
				}
492
			}
493
		}
494
	}elsif($type eq "type"){
495
                print "graph_vlabel Content type count\n";
496
		print "graph_total Total\n";
497
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n";
498

    
499
		if(keys(%cache)>0){
500
			for my $key ( sort reverse keys %cache ){
501
				my $value=$cache{$key};
502

    
503
				if($key =~ m/^type_(\S+)$/){
504
					my $type=$1;
505
					my $value=$value;
506

    
507
					my $name=$1;
508
					$name=get_fieldname($name);
509

    
510
					#$host =~ s/\_/ /g;
511
					#$host =~ s/(\S+)\s(\S+)/ /g;
512
					#$host=$1;
513
					#my $type=$2;
514
	
515
        	        		print "$name.label $type\n";
516
        	        		print "$name.min 0\n";
517
        	        		print "$name.max 100000\n";
518
					if($count eq 0){
519
						print "$name.draw AREA\n";
520
					} else {
521
						print "$name.draw STACK\n";
522
					}
523
					$count+=1;
524
				}
525
			}
526
		}
527
	}elsif($type eq "tags"){
528
                print "graph_vlabel HTML tag count\n";
529
		print "graph_total Total\n";
530
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n";
531

    
532
		if(keys(%cache)>0){
533
			for my $key ( sort reverse keys %cache ){
534
				my $value=$cache{$key};
535

    
536
				if($key =~ m/^tags_(\S+)$/){
537
					my $host=$1;
538
					my $value=$value;
539

    
540
					my $name=$1;
541
					$name=get_fieldname($name);
542

    
543
					$host =~ s/\W/ /g;
544
	
545
        	        		print "$name.label $host\n";
546
        	        		print "$name.min 0\n";
547
        	        		print "$name.max 100000\n";
548
					if($count eq 0){
549
						print "$name.draw AREA\n";
550
					} else {
551
						print "$name.draw STACK\n";
552
					}
553
					$count+=1;
554
				}
555
			}
556
		}
557
	}
558
	exit(0); 
559
} else {
560
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id);
561
	$debug && print "Reading cache file: " . $cachefile . "\n";
562
	my %cache=read_cache($cachefile);
563
	$debug && print "Number of lines in cache file: " . keys(%cache) . "\n";
564

    
565
	if(keys(%cache)>0){
566
		for my $key ( sort keys %cache ){
567
			my $value=$cache{$key};
568
			if($key =~ m/^([A-Za-z]+)\_(\S+)$/){
569
				my $name=$2;
570
				
571
				if ($1 eq $type){
572
					$name=get_fieldname($name);
573
					print $name . ".value " . $value . "\n";
574
				}
575
			} elsif(m/^(\S+)\s+(\S+)$/){
576
				if ($1 eq $type){
577
					print $1 . ".value " . $2 . "\n";
578
				}
579
			}
580
		}
581
	}
582
} 
583

    
584
# vim:syntax=perl