Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / http / http_load_ @ ef960abc

Historique | Voir | Annoter | Télécharger (15,3 ko)

1 34c987ef Espen Braastad
#!/usr/bin/perl
2
# -*- perl -*-
3
#
4
# Plugin to graph http performance
5
# Version: 0.8.7
6
#
7
# The purpose of this plugin is to monitor several properties of a web page.
8
# All measurements are done for the complete web page, including images, css
9
# and other content a standard browser would download automatically.
10
#
11
# This version supports monitoring:
12
#   * The total time to download a complete web page (using serial GET requests)
13
#   * The total size of a web page
14
#   * The different response codes (200, 404, 500, etc)
15
#   * The different tags (img src, a href, etc)
16
#   * The the different content types (image/png, text/css/, etc)
17
#   * The number of elements the web page consists of
18
# 
19
# Author:  Espen Braastad / Linpro AS
20
#          espen@linpro.no
21
#
22
##### Short usage guide: #####
23
#
24
# Requirements:
25
#  * The server running this plugin must be allowed  to connect to the web 
26
#    server(s) you are going to monitor.
27
#  * Some perl modules: 
28
#    Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache
29
#
30
# Initial configuration:
31
#  1. Copy this file to /usr/share/munin/plugins/
32
#
33
#  2. Create a file (/etc/munin/http_load_urls.txt) with one
34
#     full url per line, as many as you want, i.e.:
35
#      $ echo "http://www.dn.no/" >> /etc/munin/urls.txt
36
#      $ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt
37
#
38
#  3. Add a cron job running the plugin with cron as the argument:
39
#     */15 * * * * <user> /usr/share/munin/plugins/http_load_ cron
40
#     <user> should be the user that has write permission to
41
#     the $cachedir directory set below. Set the intervals to
42
#     whatever you want.
43
#
44
#     For verbose output (for debugging) you can do:
45
#     sudo -u <user> /usr/share/munin/plugins/http_load_ cron verbose
46
#
47
#  4. Run munin-node-configure --suggest --shell and run the symlink
48
#     commands manually to update the munin-node plugin list.
49
#  
50
# (5. If you want to change the filter which the plugin uses to select which
51
#     tags to follow in a web page, edit the subroutine called "filter" below.)
52
#
53
# Add a new url to monitor:
54
#  1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.:
55
#      $ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt
56
#
57
#  2. Run munin-node-configure --suggest --shell and manually
58
#     add the new symlink(s)
59
#
60
#  3. /etc/init.d/munin-node restart
61
#
62
# Remove a url from monitoring:
63
#  1. Remove it from /etc/munin/http_load_urls.txt
64
#
65
#  2. Remove ${cachedir}/http_load_<url_id>*
66
#
67
#  3. Remove /etc/munin/plugins/http_load_<url_id>*
68
#
69
#  4. /etc/init.d/munin-node restart
70
#
71
#####
72
#
73
# Todo:
74
#   * Add support for forking to simulate real browsers
75
#   * Use checksums as fieldnames
76
#
77
# $Id: $
78
#
79
# Magic markers:
80
#%# family=auto
81
#%# capabilities=autoconf suggest
82
83
use strict;
84
use Time::HiRes qw( gettimeofday tv_interval );
85
use LWP::UserAgent;
86
use HTML::LinkExtor;
87
use LWP::ConnCache;
88
89
my $url_file="/etc/munin/http_load_urls.txt";
90
my $cachedir="/var/lib/munin/plugin-state";
91
92
my $debug=0;
93
my $timeout=10;
94
my $max_redirects=10;
95
my $scriptname="http_load_";
96
my $category="network"; # The munin graph category
97
my $useragent="Mozilla/5.0";
98
99
# Function to read the $url_file and return the contents in a hash
100
sub read_urls{
101
	my $file=$_[0];
102
	my %urls=();
103
	if(-r $file){
104
		open(FILE,'<'.$file);
105
		while (<FILE>) { 
106
			my $url=$_;
107
			chomp($url);
108
			my $id=get_id($url);
109
			if(length($id)>0){
110
				$urls{$id}=$url;
111
			}
112
		}
113
		close (FILE);
114
	}
115
	return %urls;
116
}
117
118
# Function to read cache, return a hash
119
sub read_cache{
120
	my $file=$_[0];
121
	my %cache=();
122
	if(-r $file){
123
		open(FILE,'<'.$file);
124
		while (<FILE>) { 
125
			m/^(\S*)\s+(.*)$/;
126
			$cache{ $1 } = $2;
127
		}
128
		close (FILE);
129
	}
130
	return %cache;
131
}
132
133
# Function to filter the html tags, which files do we want to download
134
sub filter{
135
	my $tag=$_[0];
136
	my $status=1;
137
138
	# Some example data:
139
	# link href http://www.intrafish.no/template/include/css/intrafish.css
140
	# script src http://www.intrafish.no/template/include/js/intrafish.js
141
	# a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300;
142
	# img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;
143
	# area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg
144
145
	# status=1 => do download (default)
146
	# status=0 => do not download
147
148
	if("$tag" eq "form action"){
149
		$status=0;
150
	}
151
	if("$tag" eq "a href"){
152
		$status=0;
153
	}
154
	if("$tag" eq "area href"){
155
		$status=0;
156
	}
157
	return $status;
158
}
159
160
# Return the cache file name for this plugin
161
sub get_cache_file_name{
162
	my $scriptname=$_[0];
163
	my $id=$_[1];
164
	my $type=$_[2];
165
	my $file="";
166
167
	$file = $scriptname . $id . ".cache";
168
	$debug && print "Cache file: " . $file . "\n";
169
170
	return $file;
171
}
172
173
# Get fieldname (making sure it is munin "compatible" as a fieldname)
174
# 1. Remove all non-word characters from a string)
175
# 2. Make sure it has maximum 19 characters
176
sub get_fieldname{
177
	my $url=$_[0];
178
	$url =~ s/\W//g;
179
	if(length($url) > 19){
180
		$url =  substr($url, 0, 19);
181
	}
182
	return $url;
183
}
184
185
# Same as get_fieldname except it doesn't substr
186
sub get_id{
187
	my $url=$_[0];
188 11405c78 iammookli
	$url =~ s/[\W_]//g;
189 34c987ef Espen Braastad
	return $url;
190
}
191
192
$debug && print "Scriptname: " . $scriptname . "\n";
193
194
# Get the url id and the type of the graph
195
#
196
# The filename format is http_load_X_Y where
197
# X: The line number in urls.txt
198 11405c78 iammookli
# Y: The type of graph (elements, size, loadtime, ..)
199 34c987ef Espen Braastad
200
my ($id,$type);
201
$0 =~ /http_load(?:_([^_]+)|)_(.+)\s*$/;
202
$id  = $1;
203
$type = $2;
204
205
$debug && print "Id: $id, Type: $type\n";
206
207
if($ARGV[0] and $ARGV[0] eq "autoconf") {
208
	my %urls=&read_urls($url_file);
209
	if(keys(%urls) gt 0){
210
		print "yes\n";
211
		exit(0);
212
	} else {
213
		print "no\n";
214
		exit(1);
215
	}
216
217
} elsif($ARGV[0] and $ARGV[0] eq "suggest") {
218
	# get the url list, print suggestions for usage
219
	my %urls=&read_urls($url_file);
220
	while ( my ($id, $url) = each(%urls) ) {
221
        	$debug && print "id: $id => url: $url\n";
222
        	print $id . "_size\n";
223
        	print $id . "_loadtime\n";
224
        	print $id . "_response\n";
225
        	print $id . "_tags\n";
226
        	print $id . "_type\n";
227
        	print $id . "_elements\n";
228
    	}
229
	exit(0);
230
231
} elsif($ARGV[0] and $ARGV[0] eq "cron") {
232
	# This thing is run by cron and should write a cache file for munin-node to 
233
	# read from
234
235
	my $verbose=0;
236
	if($ARGV[1] and $ARGV[1] eq "verbose") {
237
		$verbose=1;
238
		print "Verbose output\n";
239
	}
240
241
	my %urls=&read_urls($url_file);
242
	my %output;
243
	my %res;
244
	my $t0;
245
	my ($request,$response,$status,$link,$contents,$page_parser,$cachefile);
246
247
	while ( my ($id, $url) = each(%urls) ) {
248
        	$verbose && print "Fetching $url (id: $id)... \n";
249
		
250
		$t0=0;
251
		$status=0;
252
		%output=();
253
		my $host="";
254
		if($url =~ m/\w+\:\/\/([^\/]+).*/){
255
			$host=$1;
256
        		$verbose && print " Host: $host\n";
257
		}
258
259
		$output{"url"}=$url;
260
		$output{"timestamp"}=time();
261
        	$verbose && print " Timestamp: " . $output{"timestamp"} . "\n";
262
263
	        my $browser = LWP::UserAgent->new();
264
265
		$browser->agent($useragent);
266
	        $browser->timeout(${timeout});
267
		$browser->max_redirect( $max_redirects );
268
		$browser->conn_cache(LWP::ConnCache->new());
269
270
		$response = $browser->get($url);
271
272
		# Calculating time from now:
273
		$t0 = [gettimeofday];
274
	        if ($response->is_success()) {
275
	                $status=1;
276
			$output{"elements_" . $host}+=1;
277
	        }
278
279
        	$contents = $response->content();
280
	        $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
281
        	$output{"size_" . $host}+=length($contents);
282
		$output{"response_" . $host . "_" . $response->code}+=1;
283
		$output{"type_" . $response->content_type}+=1;
284
285
	        $page_parser = HTML::LinkExtor->new(undef, $url);
286
	        $page_parser->parse($contents)->eof;
287
	        my @links = $page_parser->links;
288
        	$verbose && print " Processing links:\n";
289
290
        	%res=();
291
	        foreach $link (@links){
292
			my $tag=$$link[0] . " " . $$link[1];
293
			
294
			$output{"tags_" . $$link[0] . "-" . $$link[1]}+=1;
295
	
296
			if(filter($tag)){
297
				$verbose && print "  Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
298
299
				# Extract the hostname and add it to the hash
300
				if($$link[2] =~ m/http\:\/\/([^\/]+).*/){
301
					$host=$1;
302
					$output{"elements_" . $host}+=1;
303
				}
304
305
                	        my $suburl=$$link[2];
306
	
307
				$t0 = [gettimeofday];
308
				$response = $browser->get($suburl);
309
	        		$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
310
311
        			$contents = $response->content();
312
        			$output{"size_" . $host}+=length($contents);
313
				$output{"response_" . $host . "_" . $response->code}+=1;
314
				$output{"type_" . $response->content_type}+=1;
315
316
				$verbose && print "              Response: " . $response->code . " Size: " . length($contents) . "\n";
317
			} else {
318
				$verbose && print "  Skipping:   " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
319
			}
320
		}
321
322
		$cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
323
		$debug && print "Reading cache file: " . $cachefile . "... ";
324
325
		my %input=read_cache($cachefile);
326
327
		$debug && print "done\n";
328
329
		# Resetting all values to 0 before adding new values
330
		while ( my ($id, $value) = each(%input) ) {
331
			$input{$id}="U";
332
    		}
333
		
334
		# Adding new values
335
		while ( my ($id, $value) = each(%output) ) {
336
			$input{$id}=$value;
337
        		$verbose && print " Result: " . $id . " -> " . $value . "\n";
338
    		}
339
		
340
		# Writing the cache
341
		$verbose && print "Writing cache file: " . $cachefile . "... ";
342
		open(FILE,">".$cachefile);
343
		while ( my ($id, $value) = each(%input) ) {
344
			print FILE $id . " " . $value . "\n";
345
		}
346
		close(FILE);
347
		$verbose && print "done\n";
348
	}
349
	exit(0);
350
}elsif($ARGV[0] and $ARGV[0] eq "config") {
351
	my %urls=&read_urls($url_file);
352
	
353
        print "graph_title $urls{$id} ${type}\n";
354
        print "graph_args -l 0 --base 1000\n";
355
        print "graph_category " . $category . "\n";
356
	$debug && print "Reading cache file\n";
357
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
358
	my %cache=read_cache($cachefile);
359
360
	my $count=0;
361
	$debug && print "The cache file contains " . keys(%cache) . " lines\n";
362
363
	if($type eq "size"){
364
                print "graph_vlabel Bytes\n";
365
		print "graph_total Total\n";
366
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n";
367
368
		if(keys(%cache)>0){
369
			for my $key ( sort reverse keys %cache ){
370
				my $value=$cache{$key};
371
				
372
				if($key =~ m/^size_(\S+)$/){
373
					my $host=$1;
374
					my $value=$value;
375
376
					my $name=$1;
377
					$name=get_fieldname($name);
378
	
379
        	        		print "$name.label from $host\n";
380
        	        		print "$name.min 0\n";
381
        	        		print "$name.max 20000000\n";
382
					if($count eq 0){
383
						print "$name.draw AREA\n";
384
					} else {
385
						print "$name.draw STACK\n";
386
					}
387
					$count+=1;
388
				}
389
			}
390
		}
391
	}elsif($type eq "loadtime"){
392
                print "graph_vlabel Seconds\n";
393
		print "graph_total Total\n";
394
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. ";
395
		print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n";
396
		
397
		if(keys(%cache)>0){
398
			for my $key ( sort reverse keys %cache ){
399
				my $value=$cache{$key};
400
401
				if($key =~ m/^loadtime_(\S+)$/){
402
					my $host=$1;
403
					my $value=$value;
404
405
					my $name=$1;
406
					$name=get_fieldname($name);
407
	
408
        	        		print "$name.label from $host\n";
409
        	        		print "$name.min 0\n";
410
        	        		print "$name.max 400\n";
411
					if($count eq 0){
412
						print "$name.draw AREA\n";
413
					} else {
414
						print "$name.draw STACK\n";
415
					}
416
					$count+=1;
417
				}
418
			}
419
		}
420
421
	}elsif($type eq "elements"){
422
                print "graph_vlabel Number of elements\n";
423
		print "graph_total Total\n";
424
        	print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n";
425
	
426
		if(keys(%cache)>0){
427
			for my $key ( sort reverse keys %cache ){
428
				my $value=$cache{$key};
429
       
430
				if($key =~ m/^elements_(\S+)$/){
431
					my $host=$1;
432
					my $value=$value;
433
434
					my $name=$1;
435
					$name=get_fieldname($name);
436
	
437
        	        		print "$name.label from $host\n";
438
        	        		print "$name.min 0\n";
439
        	        		print "$name.max 10000\n";
440
					if($count eq 0){
441
						print "$name.draw AREA\n";
442
					} else {
443
						print "$name.draw STACK\n";
444
					}
445
					$count+=1;
446
				}
447
			}
448
		}
449
	}elsif($type eq "response"){
450
                print "graph_vlabel Server response code count\n";
451
		print "graph_total Total\n";
452
         	print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n";
453
454
		if(keys(%cache)>0){
455
			for my $key ( sort reverse keys %cache ){
456
				my $value=$cache{$key};
457
458
				if($key =~ m/^response_(\S+)$/){
459
					my $host=$1;
460
					my $value=$value;
461
462
					my $name=$1;
463
					$name=get_fieldname($name);
464
465
					$host =~ s/\_/ /g;
466
					$host =~ s/(\S+)\s(\d+)/ /g;
467
					$host=$1;
468
					my $code=$2;
469
	
470
        	        		print "$name.label $host ($code)\n";
471
        	        		print "$name.min 0\n";
472
        	        		print "$name.max 10000\n";
473
					if($count eq 0){
474
						print "$name.draw AREA\n";
475
					} else {
476
						print "$name.draw STACK\n";
477
					}
478
					$count+=1;
479
				}
480
			}
481
		}
482
	}elsif($type eq "type"){
483
                print "graph_vlabel Content type count\n";
484
		print "graph_total Total\n";
485
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n";
486
487
		if(keys(%cache)>0){
488
			for my $key ( sort reverse keys %cache ){
489
				my $value=$cache{$key};
490
491
				if($key =~ m/^type_(\S+)$/){
492
					my $type=$1;
493
					my $value=$value;
494
495
					my $name=$1;
496
					$name=get_fieldname($name);
497
498
					#$host =~ s/\_/ /g;
499
					#$host =~ s/(\S+)\s(\S+)/ /g;
500
					#$host=$1;
501
					#my $type=$2;
502
	
503
        	        		print "$name.label $type\n";
504
        	        		print "$name.min 0\n";
505
        	        		print "$name.max 100000\n";
506
					if($count eq 0){
507
						print "$name.draw AREA\n";
508
					} else {
509
						print "$name.draw STACK\n";
510
					}
511
					$count+=1;
512
				}
513
			}
514
		}
515
	}elsif($type eq "tags"){
516
                print "graph_vlabel HTML tag count\n";
517
		print "graph_total Total\n";
518
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n";
519
520
		if(keys(%cache)>0){
521
			for my $key ( sort reverse keys %cache ){
522
				my $value=$cache{$key};
523
524
				if($key =~ m/^tags_(\S+)$/){
525
					my $host=$1;
526
					my $value=$value;
527
528
					my $name=$1;
529
					$name=get_fieldname($name);
530
531
					$host =~ s/\W/ /g;
532
	
533
        	        		print "$name.label $host\n";
534
        	        		print "$name.min 0\n";
535
        	        		print "$name.max 100000\n";
536
					if($count eq 0){
537
						print "$name.draw AREA\n";
538
					} else {
539
						print "$name.draw STACK\n";
540
					}
541
					$count+=1;
542
				}
543
			}
544
		}
545
	}
546
	exit(0); 
547
} else {
548
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
549
	$debug && print "Reading cache file: " . $cachefile . "\n";
550
	my %cache=read_cache($cachefile);
551
	$debug && print "Number of lines in cache file: " . keys(%cache) . "\n";
552
553
	if(keys(%cache)>0){
554
		for my $key ( sort keys %cache ){
555
			my $value=$cache{$key};
556
			if($key =~ m/^([A-Za-z]+)\_(\S+)$/){
557
				my $name=$2;
558
				
559
				if ($1 eq $type){
560
					$name=get_fieldname($name);
561
					print $name . ".value " . $value . "\n";
562
				}
563
			} elsif(m/^(\S+)\s+(\S+)$/){
564
				if ($1 eq $type){
565
					print $1 . ".value " . $2 . "\n";
566
				}
567
			}
568
		}
569
	}
570
} 
571
572
# vim:syntax=perl