Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / http / http_load_ @ ec0df071

Historique | Voir | Annoter | Télécharger (15,6 ko)

1 34c987ef Espen Braastad
#!/usr/bin/perl
2
# -*- perl -*-
3
#
4
# Plugin to graph http performance
5
# Version: 0.8.7
6
#
7
# The purpose of this plugin is to monitor several properties of a web page.
8
# All measurements are done for the complete web page, including images, css
9
# and other content a standard browser would download automatically.
10
#
11
# This version supports monitoring:
12
#   * The total time to download a complete web page (using serial GET requests)
13
#   * The total size of a web page
14
#   * The different response codes (200, 404, 500, etc)
15
#   * The different tags (img src, a href, etc)
16
#   * The the different content types (image/png, text/css/, etc)
17
#   * The number of elements the web page consists of
18
# 
19
# Author:  Espen Braastad / Linpro AS
20
#          espen@linpro.no
21
#
22
##### Short usage guide: #####
23
#
24
# Requirements:
25
#  * The server running this plugin must be allowed  to connect to the web 
26
#    server(s) you are going to monitor.
27
#  * Some perl modules: 
28
#    Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache
29
#
30
# Initial configuration:
31
#  1. Copy this file to /usr/share/munin/plugins/
32
#
33
#  2. Create a file (/etc/munin/http_load_urls.txt) with one
34
#     full url per line, as many as you want, i.e.:
35
#      $ echo "http://www.dn.no/" >> /etc/munin/urls.txt
36
#      $ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt
37
#
38
#  3. Add a cron job running the plugin with cron as the argument:
39
#     */15 * * * * <user> /usr/share/munin/plugins/http_load_ cron
40
#     <user> should be the user that has write permission to
41
#     the $cachedir directory set below. Set the intervals to
42
#     whatever you want.
43
#
44
#     For verbose output (for debugging) you can do:
45
#     sudo -u <user> /usr/share/munin/plugins/http_load_ cron verbose
46
#
47
#  4. Run munin-node-configure --suggest --shell and run the symlink
48
#     commands manually to update the munin-node plugin list.
49
#  
50
# (5. If you want to change the filter which the plugin uses to select which
51
#     tags to follow in a web page, edit the subroutine called "filter" below.)
52
#
53
# Add a new url to monitor:
54
#  1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.:
55
#      $ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt
56
#
57
#  2. Run munin-node-configure --suggest --shell and manually
58
#     add the new symlink(s)
59
#
60
#  3. /etc/init.d/munin-node restart
61
#
62
# Remove a url from monitoring:
63
#  1. Remove it from /etc/munin/http_load_urls.txt
64
#
65
#  2. Remove ${cachedir}/http_load_<url_id>*
66
#
67
#  3. Remove /etc/munin/plugins/http_load_<url_id>*
68
#
69
#  4. /etc/init.d/munin-node restart
70
#
71
#####
72
#
73
# Todo:
74
#   * Add support for forking to simulate real browsers
75
#   * Use checksums as fieldnames
76
#
77
# $Id: $
78
#
79
# Magic markers:
80
#%# family=auto
81
#%# capabilities=autoconf suggest
82
83
use strict;
84
use Time::HiRes qw( gettimeofday tv_interval );
85
use LWP::UserAgent;
86
use HTML::LinkExtor;
87
use LWP::ConnCache;
88
89
my $url_file="/etc/munin/http_load_urls.txt";
90
my $cachedir="/var/lib/munin/plugin-state";
91
92
my $debug=0;
93
my $timeout=10;
94
my $max_redirects=10;
95
my $scriptname="http_load_";
96
my $category="network"; # The munin graph category
97
my $useragent="Mozilla/5.0";
98
99
# Function to read the $url_file and return the contents in a hash
100
sub read_urls{
101
	my $file=$_[0];
102
	my %urls=();
103
	if(-r $file){
104
		open(FILE,'<'.$file);
105
		while (<FILE>) { 
106
			my $url=$_;
107
			chomp($url);
108
			my $id=get_id($url);
109
			if(length($id)>0){
110
				$urls{$id}=$url;
111
			}
112
		}
113
		close (FILE);
114
	}
115
	return %urls;
116
}
117
118
# Function to read cache, return a hash
119
sub read_cache{
120
	my $file=$_[0];
121
	my %cache=();
122
	if(-r $file){
123
		open(FILE,'<'.$file);
124
		while (<FILE>) { 
125
			m/^(\S*)\s+(.*)$/;
126
			$cache{ $1 } = $2;
127
		}
128
		close (FILE);
129
	}
130
	return %cache;
131
}
132
133
# Function to filter the html tags, which files do we want to download
134
sub filter{
135
	my $tag=$_[0];
136
	my $status=1;
137
138
	# Some example data:
139
	# link href http://www.intrafish.no/template/include/css/intrafish.css
140
	# script src http://www.intrafish.no/template/include/js/intrafish.js
141
	# a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300;
142
	# img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;
143
	# area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg
144
145
	# status=1 => do download (default)
146
	# status=0 => do not download
147
148
	if("$tag" eq "form action"){
149
		$status=0;
150
	}
151
	if("$tag" eq "a href"){
152
		$status=0;
153
	}
154
	if("$tag" eq "area href"){
155
		$status=0;
156
	}
157
	return $status;
158
}
159
160
# Return the cache file name for this plugin
161
sub get_cache_file_name{
162
	my $scriptname=$_[0];
163
	my $id=$_[1];
164
	my $type=$_[2];
165
	my $file="";
166
167
	$file = $scriptname . $id . ".cache";
168
	$debug && print "Cache file: " . $file . "\n";
169
170
	return $file;
171
}
172
173 ec0df071 Olivier Mehani
# Get fieldname (making sure it is munin-1.0 "compatible" as a fieldname)
174 34c987ef Espen Braastad
# 1. Remove all non-word characters from a string)
175
# 2. Make sure it has maximum 19 characters
176 ec0df071 Olivier Mehani
#    2.1 If not, truncate the host part, while keeping anything after an underscore (e.g., HTTP response status)
177 34c987ef Espen Braastad
sub get_fieldname{
178
	my $url=$_[0];
179
	$url =~ s/\W//g;
180
	if(length($url) > 19){
181 ec0df071 Olivier Mehani
		$url =~ s/(\S+)_(\S+)/ /g;
182
		my $host = $1;
183
		my $info = $2;
184
		my $suffixlength = length($info) + 1;
185
		if ($suffixlength > 1) {
186
			$url = substr($host, 0, 19 - $suffixlength) . '_' . $info;
187
		} else {
188
			$url = substr($url, 0, 19);
189
		}
190 34c987ef Espen Braastad
	}
191
	return $url;
192
}
193
194
# Same as get_fieldname except it doesn't substr
195
sub get_id{
196
	my $url=$_[0];
197 11405c78 iammookli
	$url =~ s/[\W_]//g;
198 34c987ef Espen Braastad
	return $url;
199
}
200
201
$debug && print "Scriptname: " . $scriptname . "\n";
202
203
# Get the url id and the type of the graph
204
#
205
# The filename format is http_load_X_Y where
206
# X: The line number in urls.txt
207 11405c78 iammookli
# Y: The type of graph (elements, size, loadtime, ..)
208 34c987ef Espen Braastad
209
my ($id,$type);
210
$0 =~ /http_load(?:_([^_]+)|)_(.+)\s*$/;
211
$id  = $1;
212
$type = $2;
213
214
$debug && print "Id: $id, Type: $type\n";
215
216
if($ARGV[0] and $ARGV[0] eq "autoconf") {
217
	my %urls=&read_urls($url_file);
218
	if(keys(%urls) gt 0){
219
		print "yes\n";
220
		exit(0);
221
	} else {
222
		print "no\n";
223
		exit(1);
224
	}
225
226
} elsif($ARGV[0] and $ARGV[0] eq "suggest") {
227
	# get the url list, print suggestions for usage
228
	my %urls=&read_urls($url_file);
229
	while ( my ($id, $url) = each(%urls) ) {
230
        	$debug && print "id: $id => url: $url\n";
231
        	print $id . "_size\n";
232
        	print $id . "_loadtime\n";
233
        	print $id . "_response\n";
234
        	print $id . "_tags\n";
235
        	print $id . "_type\n";
236
        	print $id . "_elements\n";
237
    	}
238
	exit(0);
239
240
} elsif($ARGV[0] and $ARGV[0] eq "cron") {
241
	# This thing is run by cron and should write a cache file for munin-node to 
242
	# read from
243
244
	my $verbose=0;
245
	if($ARGV[1] and $ARGV[1] eq "verbose") {
246
		$verbose=1;
247
		print "Verbose output\n";
248
	}
249
250
	my %urls=&read_urls($url_file);
251
	my %output;
252
	my %res;
253
	my $t0;
254
	my ($request,$response,$status,$link,$contents,$page_parser,$cachefile);
255
256
	while ( my ($id, $url) = each(%urls) ) {
257
        	$verbose && print "Fetching $url (id: $id)... \n";
258
		
259
		$t0=0;
260
		$status=0;
261
		%output=();
262
		my $host="";
263
		if($url =~ m/\w+\:\/\/([^\/]+).*/){
264
			$host=$1;
265
        		$verbose && print " Host: $host\n";
266
		}
267
268
		$output{"url"}=$url;
269
		$output{"timestamp"}=time();
270
        	$verbose && print " Timestamp: " . $output{"timestamp"} . "\n";
271
272
	        my $browser = LWP::UserAgent->new();
273
274
		$browser->agent($useragent);
275
	        $browser->timeout(${timeout});
276
		$browser->max_redirect( $max_redirects );
277
		$browser->conn_cache(LWP::ConnCache->new());
278
279
		$response = $browser->get($url);
280
281
		# Calculating time from now:
282
		$t0 = [gettimeofday];
283
	        if ($response->is_success()) {
284
	                $status=1;
285
			$output{"elements_" . $host}+=1;
286
	        }
287
288
        	$contents = $response->content();
289
	        $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
290
        	$output{"size_" . $host}+=length($contents);
291
		$output{"response_" . $host . "_" . $response->code}+=1;
292
		$output{"type_" . $response->content_type}+=1;
293
294
	        $page_parser = HTML::LinkExtor->new(undef, $url);
295
	        $page_parser->parse($contents)->eof;
296
	        my @links = $page_parser->links;
297
        	$verbose && print " Processing links:\n";
298
299
        	%res=();
300
	        foreach $link (@links){
301
			my $tag=$$link[0] . " " . $$link[1];
302
			
303
			$output{"tags_" . $$link[0] . "-" . $$link[1]}+=1;
304
	
305
			if(filter($tag)){
306
				$verbose && print "  Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
307
308
				# Extract the hostname and add it to the hash
309
				if($$link[2] =~ m/http\:\/\/([^\/]+).*/){
310
					$host=$1;
311
					$output{"elements_" . $host}+=1;
312
				}
313
314
                	        my $suburl=$$link[2];
315
	
316
				$t0 = [gettimeofday];
317
				$response = $browser->get($suburl);
318
	        		$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
319
320
        			$contents = $response->content();
321
        			$output{"size_" . $host}+=length($contents);
322
				$output{"response_" . $host . "_" . $response->code}+=1;
323
				$output{"type_" . $response->content_type}+=1;
324
325
				$verbose && print "              Response: " . $response->code . " Size: " . length($contents) . "\n";
326
			} else {
327
				$verbose && print "  Skipping:   " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
328
			}
329
		}
330
331
		$cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
332
		$debug && print "Reading cache file: " . $cachefile . "... ";
333
334
		my %input=read_cache($cachefile);
335
336
		$debug && print "done\n";
337
338
		# Resetting all values to 0 before adding new values
339
		while ( my ($id, $value) = each(%input) ) {
340
			$input{$id}="U";
341
    		}
342
		
343
		# Adding new values
344
		while ( my ($id, $value) = each(%output) ) {
345
			$input{$id}=$value;
346
        		$verbose && print " Result: " . $id . " -> " . $value . "\n";
347
    		}
348
		
349
		# Writing the cache
350
		$verbose && print "Writing cache file: " . $cachefile . "... ";
351
		open(FILE,">".$cachefile);
352
		while ( my ($id, $value) = each(%input) ) {
353
			print FILE $id . " " . $value . "\n";
354
		}
355
		close(FILE);
356
		$verbose && print "done\n";
357
	}
358
	exit(0);
359
}elsif($ARGV[0] and $ARGV[0] eq "config") {
360
	my %urls=&read_urls($url_file);
361
	
362
        print "graph_title $urls{$id} ${type}\n";
363
        print "graph_args -l 0 --base 1000\n";
364
        print "graph_category " . $category . "\n";
365
	$debug && print "Reading cache file\n";
366
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
367
	my %cache=read_cache($cachefile);
368
369
	my $count=0;
370
	$debug && print "The cache file contains " . keys(%cache) . " lines\n";
371
372
	if($type eq "size"){
373
                print "graph_vlabel Bytes\n";
374
		print "graph_total Total\n";
375
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n";
376
377
		if(keys(%cache)>0){
378
			for my $key ( sort reverse keys %cache ){
379
				my $value=$cache{$key};
380
				
381
				if($key =~ m/^size_(\S+)$/){
382
					my $host=$1;
383
					my $value=$value;
384
385
					my $name=$1;
386
					$name=get_fieldname($name);
387
	
388
        	        		print "$name.label from $host\n";
389
        	        		print "$name.min 0\n";
390
        	        		print "$name.max 20000000\n";
391
					if($count eq 0){
392
						print "$name.draw AREA\n";
393
					} else {
394
						print "$name.draw STACK\n";
395
					}
396
					$count+=1;
397
				}
398
			}
399
		}
400
	}elsif($type eq "loadtime"){
401
                print "graph_vlabel Seconds\n";
402
		print "graph_total Total\n";
403
         	print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. ";
404
		print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n";
405
		
406
		if(keys(%cache)>0){
407
			for my $key ( sort reverse keys %cache ){
408
				my $value=$cache{$key};
409
410
				if($key =~ m/^loadtime_(\S+)$/){
411
					my $host=$1;
412
					my $value=$value;
413
414
					my $name=$1;
415
					$name=get_fieldname($name);
416
	
417
        	        		print "$name.label from $host\n";
418
        	        		print "$name.min 0\n";
419
        	        		print "$name.max 400\n";
420
					if($count eq 0){
421
						print "$name.draw AREA\n";
422
					} else {
423
						print "$name.draw STACK\n";
424
					}
425
					$count+=1;
426
				}
427
			}
428
		}
429
430
	}elsif($type eq "elements"){
431
                print "graph_vlabel Number of elements\n";
432
		print "graph_total Total\n";
433
        	print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n";
434
	
435
		if(keys(%cache)>0){
436
			for my $key ( sort reverse keys %cache ){
437
				my $value=$cache{$key};
438
       
439
				if($key =~ m/^elements_(\S+)$/){
440
					my $host=$1;
441
					my $value=$value;
442
443
					my $name=$1;
444
					$name=get_fieldname($name);
445
	
446
        	        		print "$name.label from $host\n";
447
        	        		print "$name.min 0\n";
448
        	        		print "$name.max 10000\n";
449
					if($count eq 0){
450
						print "$name.draw AREA\n";
451
					} else {
452
						print "$name.draw STACK\n";
453
					}
454
					$count+=1;
455
				}
456
			}
457
		}
458
	}elsif($type eq "response"){
459
                print "graph_vlabel Server response code count\n";
460
		print "graph_total Total\n";
461
         	print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n";
462
463
		if(keys(%cache)>0){
464
			for my $key ( sort reverse keys %cache ){
465
				my $value=$cache{$key};
466
467
				if($key =~ m/^response_(\S+)$/){
468
					my $host=$1;
469
					my $value=$value;
470
471
					my $name=$1;
472
					$name=get_fieldname($name);
473
474
					$host =~ s/\_/ /g;
475
					$host =~ s/(\S+)\s(\d+)/ /g;
476
					$host=$1;
477
					my $code=$2;
478
	
479
        	        		print "$name.label $host ($code)\n";
480
        	        		print "$name.min 0\n";
481
        	        		print "$name.max 10000\n";
482
					if($count eq 0){
483
						print "$name.draw AREA\n";
484
					} else {
485
						print "$name.draw STACK\n";
486
					}
487
					$count+=1;
488
				}
489
			}
490
		}
491
	}elsif($type eq "type"){
492
                print "graph_vlabel Content type count\n";
493
		print "graph_total Total\n";
494
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n";
495
496
		if(keys(%cache)>0){
497
			for my $key ( sort reverse keys %cache ){
498
				my $value=$cache{$key};
499
500
				if($key =~ m/^type_(\S+)$/){
501
					my $type=$1;
502
					my $value=$value;
503
504
					my $name=$1;
505
					$name=get_fieldname($name);
506
507
					#$host =~ s/\_/ /g;
508
					#$host =~ s/(\S+)\s(\S+)/ /g;
509
					#$host=$1;
510
					#my $type=$2;
511
	
512
        	        		print "$name.label $type\n";
513
        	        		print "$name.min 0\n";
514
        	        		print "$name.max 100000\n";
515
					if($count eq 0){
516
						print "$name.draw AREA\n";
517
					} else {
518
						print "$name.draw STACK\n";
519
					}
520
					$count+=1;
521
				}
522
			}
523
		}
524
	}elsif($type eq "tags"){
525
                print "graph_vlabel HTML tag count\n";
526
		print "graph_total Total\n";
527
         	print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n";
528
529
		if(keys(%cache)>0){
530
			for my $key ( sort reverse keys %cache ){
531
				my $value=$cache{$key};
532
533
				if($key =~ m/^tags_(\S+)$/){
534
					my $host=$1;
535
					my $value=$value;
536
537
					my $name=$1;
538
					$name=get_fieldname($name);
539
540
					$host =~ s/\W/ /g;
541
	
542
        	        		print "$name.label $host\n";
543
        	        		print "$name.min 0\n";
544
        	        		print "$name.max 100000\n";
545
					if($count eq 0){
546
						print "$name.draw AREA\n";
547
					} else {
548
						print "$name.draw STACK\n";
549
					}
550
					$count+=1;
551
				}
552
			}
553
		}
554
	}
555
	exit(0); 
556
} else {
557
	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
558
	$debug && print "Reading cache file: " . $cachefile . "\n";
559
	my %cache=read_cache($cachefile);
560
	$debug && print "Number of lines in cache file: " . keys(%cache) . "\n";
561
562
	if(keys(%cache)>0){
563
		for my $key ( sort keys %cache ){
564
			my $value=$cache{$key};
565
			if($key =~ m/^([A-Za-z]+)\_(\S+)$/){
566
				my $name=$2;
567
				
568
				if ($1 eq $type){
569
					$name=get_fieldname($name);
570
					print $name . ".value " . $value . "\n";
571
				}
572
			} elsif(m/^(\S+)\s+(\S+)$/){
573
				if ($1 eq $type){
574
					print $1 . ".value " . $2 . "\n";
575
				}
576
			}
577
		}
578
	}
579
} 
580
581
# vim:syntax=perl