/plugins/http/http_load_ - Munin contributed plugins - Koumbit's Redmine

Télécharger au format

root / plugins / http / http_load_ @ 4b2fcbf8

Historique | Voir | Annoter | Télécharger (15,6 ko)

       #!/usr/bin/perl
       # -*- perl -*-
+      #
       # Plugin to graph http performance
       # Version: 0.8.7
+      #
       # The purpose of this plugin is to monitor several properties of a web page.
       # All measurements are done for the complete web page, including images, css
       # and other content a standard browser would download automatically.
+      #
       # This version supports monitoring:
       #   * The total time to download a complete web page (using serial GET requests)
       #   * The total size of a web page
       #   * The different response codes (200, 404, 500, etc)
       #   * The different tags (img src, a href, etc)
       #   * The the different content types (image/png, text/css/, etc)
       #   * The number of elements the web page consists of
+      #
       # Author:  Espen Braastad / Linpro AS
       #          espen@linpro.no
+      #
       ##### Short usage guide: #####
+      #
       # Requirements:
       #  * The server running this plugin must be allowed  to connect to the web
       #    server(s) you are going to monitor.
       #  * Some perl modules:
       #    Time::HiRes, LWP::UserAgent, HTML::LinkExtor, LWP::ConnCache
+      #
       # Initial configuration:
       #  1. Copy this file to /usr/share/munin/plugins/
+      #
       #  2. Create a file (/etc/munin/http_load_urls.txt) with one
       #     full url per line, as many as you want, i.e.:
       #      $ echo "http://www.dn.no/" >> /etc/munin/urls.txt
       #      $ echo "http://www.intrafish.no/" >> /etc/munin/urls.txt
+      #
       #  3. Add a cron job running the plugin with cron as the argument:
       #     */15 * * * * <user> /usr/share/munin/plugins/http_load_ cron
       #     <user> should be the user that has write permission to
       #     the $cachedir directory set below. Set the intervals to
       #     whatever you want.
+      #
       #     For verbose output (for debugging) you can do:
       #     sudo -u <user> /usr/share/munin/plugins/http_load_ cron verbose
+      #
       #  4. Run munin-node-configure --suggest --shell and run the symlink
       #     commands manually to update the munin-node plugin list.
+      #
       # (5. If you want to change the filter which the plugin uses to select which
       #     tags to follow in a web page, edit the subroutine called "filter" below.)
+      #
       # Add a new url to monitor:
       #  1. Add a new line in /etc/munin/urls.txt with the full URL, i.e.:
       #      $ echo "http://www.linpro.no/" >> /etc/munin/http_load_urls.txt
+      #
       #  2. Run munin-node-configure --suggest --shell and manually
       #     add the new symlink(s)
+      #
       #  3. /etc/init.d/munin-node restart
+      #
       # Remove a url from monitoring:
       #  1. Remove it from /etc/munin/http_load_urls.txt
+      #
       #  2. Remove ${cachedir}/http_load_<url_id>*
+      #
       #  3. Remove /etc/munin/plugins/http_load_<url_id>*
+      #
       #  4. /etc/init.d/munin-node restart
+      #
       #####
+      #
       # Todo:
       #   * Add support for forking to simulate real browsers
       #   * Use checksums as fieldnames
+      #
       # $Id: $
+      #
       # Magic markers:
       #%# family=auto
       #%# capabilities=autoconf suggest
       use strict;
       use Time::HiRes qw( gettimeofday tv_interval );
       use LWP::UserAgent;
       use HTML::LinkExtor;
       use LWP::ConnCache;
       my $url_file="/etc/munin/http_load_urls.txt";
       my $cachedir=$ENV{MUNIN_PLUGSTATE};
       my $debug=0;
       my $timeout=10;
       my $max_redirects=10;
       my $scriptname="http_load_";
       my $useragent="Mozilla/5.0";
       # Function to read the $url_file and return the contents in a hash
       sub read_urls{
       	my $file=$_[0];
       	my %urls=();
       	if(-r $file){
       		open(FILE,'<'.$file);
       		while (<FILE>) {
       			my $url=$_;
       			chomp($url);
       			my $id=get_id($url);
       			if(length($id)>0){
       				$urls{$id}=$url;
+      			}
+      		}
       		close (FILE);
+      	}
       	return %urls;
+      }
       # Function to read cache, return a hash
       sub read_cache{
       	my $file=$_[0];
       	my %cache=();
       	if(-r $file){
       		open(FILE,'<'.$file);
       		while (<FILE>) {
       			m/^(\S*)\s+(.*)$/;
       			$cache{ $1 } = $2;
+      		}
       		close (FILE);
+      	}
       	return %cache;
+      }
       # Function to filter the html tags, which files do we want to download
       sub filter{
       	my $tag=$_[0];
       	my $status=1;
       	# Some example data:
       	# link href http://www.intrafish.no/template/include/css/intrafish.css
       	# script src http://www.intrafish.no/template/include/js/intrafish.js
       	# a href http://adserver.adtech.de/?adlink%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;loc=300;
       	# img src http://adserver.adtech.de/?adserv%7C2.0%7C405%7C119488%7C1%7C16%7CADTECH;grp=8491;
       	# area href http://go.vg.no/cgi-bin/go.cgi/sol/http://www.sol.no/sgo/vg/http://www.sol.no/underholdning/humor/?partnerid=vg
       	# status=1 => do download (default)
       	# status=0 => do not download
       	if("$tag" eq "form action"){
       		$status=0;
+      	}
       	if("$tag" eq "a href"){
       		$status=0;
+      	}
       	if("$tag" eq "area href"){
       		$status=0;
+      	}
       	return $status;
+      }
       # Return the cache file name for this plugin
       sub get_cache_file_name{
       	my $scriptname=$_[0];
       	my $id=$_[1];
       	my $type=$_[2];
       	my $file="";
       	$file = $scriptname . $id . ".cache";
       	$debug && print "Cache file: " . $file . "\n";
       	return $file;
+      }
       # Get fieldname (making sure it is munin-1.0 "compatible" as a fieldname)
       # 1. Remove all non-word characters from a string)
       # 2. Make sure it has maximum 19 characters
       #    2.1 If not, truncate the host part, while keeping anything after an underscore (e.g., HTTP response status)
       sub get_fieldname{
       	my $url=$_[0];
       	$url =~ s/\W//g;
       	if(length($url) > 19){
       		$url =~ s/(\S+)_(\S+)/ /g;
       		my $host = $1;
       		my $info = $2;
       		my $suffixlength = length($info) + 1;
       		if ($suffixlength > 1) {
       			$url = substr($host, 0, 19 - $suffixlength) . '_' . $info;
       		} else {
       			$url = substr($url, 0, 19);
+      		}
+      	}
       	return $url;
+      }
       # Same as get_fieldname except it doesn't substr
       sub get_id{
       	my $url=$_[0];
       	$url =~ s/[\W_]//g;
       	return $url;
+      }
       $debug && print "Scriptname: " . $scriptname . "\n";
       # Get the url id and the type of the graph
+      #
       # The filename format is http_load_X_Y where
       # X: The line number in urls.txt
       # Y: The type of graph (elements, size, loadtime, ..)
       my ($id,$type);
       $0 =~ /http_load(?:_([^_]+)|)_(.+)\s*$/;
       $id  = $1;
       $type = $2;
       $debug && print "Id: $id, Type: $type\n";
       if($ARGV[0] and $ARGV[0] eq "autoconf") {
       	my %urls=&read_urls($url_file);
       	if(keys(%urls) gt 0){
       		print "yes\n";
       		exit(0);
       	} else {
       		print "no\n";
       		exit(1);
+      	}
       } elsif($ARGV[0] and $ARGV[0] eq "suggest") {
       	# get the url list, print suggestions for usage
       	my %urls=&read_urls($url_file);
       	while ( my ($id, $url) = each(%urls) ) {
               	$debug && print "id: $id => url: $url\n";
               	print $id . "_size\n";
               	print $id . "_loadtime\n";
               	print $id . "_response\n";
               	print $id . "_tags\n";
               	print $id . "_type\n";
               	print $id . "_elements\n";
+          	}
       	exit(0);
       } elsif($ARGV[0] and $ARGV[0] eq "cron") {
       	# This thing is run by cron and should write a cache file for munin-node to
       	# read from
       	my $verbose=0;
       	if($ARGV[1] and $ARGV[1] eq "verbose") {
       		$verbose=1;
       		print "Verbose output\n";
+      	}
       	my %urls=&read_urls($url_file);
       	my %output;
       	my %res;
       	my $t0;
       	my ($request,$response,$status,$link,$contents,$page_parser,$cachefile);
       	while ( my ($id, $url) = each(%urls) ) {
               	$verbose && print "Fetching $url (id: $id)... \n";
       		$t0=0;
       		$status=0;
       		%output=();
       		my $host="";
       		if($url =~ m/\w+\:\/\/([^\/]+).*/){
       			$host=$1;
               		$verbose && print " Host: $host\n";
+      		}
       		$output{"url"}=$url;
       		$output{"timestamp"}=time();
               	$verbose && print " Timestamp: " . $output{"timestamp"} . "\n";
       	        my $browser = LWP::UserAgent->new();
       		$browser->agent($useragent);
       	        $browser->timeout(${timeout});
       		$browser->max_redirect( $max_redirects );
       		$browser->conn_cache(LWP::ConnCache->new());
       		$response = $browser->get($url);
       		# Calculating time from now:
       		$t0 = [gettimeofday];
       	        if ($response->is_success()) {
       	                $status=1;
       			$output{"elements_" . $host}+=1;
+      	        }
               	$contents = $response->content();
       	        $output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
               	$output{"size_" . $host}+=length($contents);
       		$output{"response_" . $host . "_" . $response->code}+=1;
       		$output{"type_" . $response->content_type}+=1;
       	        $page_parser = HTML::LinkExtor->new(undef, $url);
       	        $page_parser->parse($contents)->eof;
       	        my @links = $page_parser->links;
               	$verbose && print " Processing links:\n";
               	%res=();
       	        foreach $link (@links){
       			my $tag=$$link[0] . " " . $$link[1];
       			$output{"tags_" . $$link[0] . "-" . $$link[1]}+=1;
       			if(filter($tag)){
       				$verbose && print "  Processing: " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
       				# Extract the hostname and add it to the hash
       				if($$link[2] =~ m/http\:\/\/([^\/]+).*/){
       					$host=$1;
       					$output{"elements_" . $host}+=1;
+      				}
                       	        my $suburl=$$link[2];
       				$t0 = [gettimeofday];
       				$response = $browser->get($suburl);
       	        		$output{"loadtime_" . $host} += sprintf("%.6f",tv_interval ( $t0, [gettimeofday]));
               			$contents = $response->content();
               			$output{"size_" . $host}+=length($contents);
       				$output{"response_" . $host . "_" . $response->code}+=1;
       				$output{"type_" . $response->content_type}+=1;
       				$verbose && print "              Response: " . $response->code . " Size: " . length($contents) . "\n";
       			} else {
       				$verbose && print "  Skipping:   " . $$link[0] . " " . $$link[1] . " " . $$link[2] . "\n";
+      			}
+      		}
       		$cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
       		$debug && print "Reading cache file: " . $cachefile . "... ";
       		my %input=read_cache($cachefile);
       		$debug && print "done\n";
       		# Resetting all values to 0 before adding new values
       		while ( my ($id, $value) = each(%input) ) {
       			$input{$id}="U";
+          		}
       		# Adding new values
       		while ( my ($id, $value) = each(%output) ) {
       			$input{$id}=$value;
               		$verbose && print " Result: " . $id . " -> " . $value . "\n";
+          		}
       		# Writing the cache
       		$verbose && print "Writing cache file: " . $cachefile . "... ";
       		open(FILE,">".$cachefile);
       		while ( my ($id, $value) = each(%input) ) {
       			print FILE $id . " " . $value . "\n";
+      		}
       		close(FILE);
       		$verbose && print "done\n";
+      	}
       	exit(0);
       }elsif($ARGV[0] and $ARGV[0] eq "config") {
       	my %urls=&read_urls($url_file);
               print "graph_title $urls{$id} ${type}\n";
               print "graph_args -l 0 --base 1000\n";
               print "graph_category webserver\n";
       	$debug && print "Reading cache file\n";
       	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
       	my %cache=read_cache($cachefile);
       	my $count=0;
       	$debug && print "The cache file contains " . keys(%cache) . " lines\n";
       	if($type eq "size"){
                       print "graph_vlabel Bytes\n";
       		print "graph_total Total\n";
                	print "graph_info This graph is generated by a set of serial GETs to calculate the total size of $urls{$id}.\n";
       		if(keys(%cache)>0){
       			for my $key ( sort reverse keys %cache ){
       				my $value=$cache{$key};
       				if($key =~ m/^size_(\S+)$/){
       					my $host=$1;
       					my $value=$value;
       					my $name=$1;
       					$name=get_fieldname($name);
               	        		print "$name.label from $host\n";
               	        		print "$name.min 0\n";
               	        		print "$name.max 20000000\n";
       					if($count eq 0){
       						print "$name.draw AREA\n";
       					} else {
       						print "$name.draw STACK\n";
+      					}
       					$count+=1;
+      				}
+      			}
+      		}
       	}elsif($type eq "loadtime"){
                       print "graph_vlabel Seconds\n";
       		print "graph_total Total\n";
                	print "graph_info This graph is generated by a set of serial GETs to calculate the total time to load $urls{$id}. ";
       		print "Note that browsers usually fork() the GET requests, resulting in a shorter total loading time.\n";
       		if(keys(%cache)>0){
       			for my $key ( sort reverse keys %cache ){
       				my $value=$cache{$key};
       				if($key =~ m/^loadtime_(\S+)$/){
       					my $host=$1;
       					my $value=$value;
       					my $name=$1;
       					$name=get_fieldname($name);
               	        		print "$name.label from $host\n";
               	        		print "$name.min 0\n";
               	        		print "$name.max 400\n";
       					if($count eq 0){
       						print "$name.draw AREA\n";
       					} else {
       						print "$name.draw STACK\n";
+      					}
       					$count+=1;
+      				}
+      			}
+      		}
       	}elsif($type eq "elements"){
                       print "graph_vlabel Number of elements\n";
       		print "graph_total Total\n";
               	print "graph_info This graph is generated by a set of serial GETs to count the number of elements (images, CSS files, etc) from $urls{$id}.\n";
       		if(keys(%cache)>0){
       			for my $key ( sort reverse keys %cache ){
       				my $value=$cache{$key};
       				if($key =~ m/^elements_(\S+)$/){
       					my $host=$1;
       					my $value=$value;
       					my $name=$1;
       					$name=get_fieldname($name);
               	        		print "$name.label from $host\n";
               	        		print "$name.min 0\n";
               	        		print "$name.max 10000\n";
       					if($count eq 0){
       						print "$name.draw AREA\n";
       					} else {
       						print "$name.draw STACK\n";
+      					}
       					$count+=1;
+      				}
+      			}
+      		}
       	}elsif($type eq "response"){
                       print "graph_vlabel Server response code count\n";
       		print "graph_total Total\n";
                	print "graph_info This graph is generated by a set of serial GETs to visualize the server response codes received while loading $urls{$id}.\n";
       		if(keys(%cache)>0){
       			for my $key ( sort reverse keys %cache ){
       				my $value=$cache{$key};
       				if($key =~ m/^response_(\S+)$/){
       					my $host=$1;
       					my $value=$value;
       					my $name=$1;
       					$name=get_fieldname($name);
       					$host =~ s/\_/ /g;
       					$host =~ s/(\S+)\s(\d+)/ /g;
       					$host=$1;
       					my $code=$2;
               	        		print "$name.label $host ($code)\n";
               	        		print "$name.min 0\n";
               	        		print "$name.max 10000\n";
       					if($count eq 0){
       						print "$name.draw AREA\n";
       					} else {
       						print "$name.draw STACK\n";
+      					}
       					$count+=1;
+      				}
+      			}
+      		}
       	}elsif($type eq "type"){
                       print "graph_vlabel Content type count\n";
       		print "graph_total Total\n";
                	print "graph_info This graph is generated by a set of serial GETs to visualize the different content types $urls{$id} consists of.\n";
       		if(keys(%cache)>0){
       			for my $key ( sort reverse keys %cache ){
       				my $value=$cache{$key};
       				if($key =~ m/^type_(\S+)$/){
       					my $type=$1;
       					my $value=$value;
       					my $name=$1;
       					$name=get_fieldname($name);
       					#$host =~ s/\_/ /g;
       					#$host =~ s/(\S+)\s(\S+)/ /g;
       					#$host=$1;
       					#my $type=$2;
               	        		print "$name.label $type\n";
               	        		print "$name.min 0\n";
               	        		print "$name.max 100000\n";
       					if($count eq 0){
       						print "$name.draw AREA\n";
       					} else {
       						print "$name.draw STACK\n";
+      					}
       					$count+=1;
+      				}
+      			}
+      		}
       	}elsif($type eq "tags"){
                       print "graph_vlabel HTML tag count\n";
       		print "graph_total Total\n";
                	print "graph_info This graph is generated by a set of serial GETs to visualize the different tags $urls{$id} consists of.\n";
       		if(keys(%cache)>0){
       			for my $key ( sort reverse keys %cache ){
       				my $value=$cache{$key};
       				if($key =~ m/^tags_(\S+)$/){
       					my $host=$1;
       					my $value=$value;
       					my $name=$1;
       					$name=get_fieldname($name);
       					$host =~ s/\W/ /g;
               	        		print "$name.label $host\n";
               	        		print "$name.min 0\n";
               	        		print "$name.max 100000\n";
       					if($count eq 0){
       						print "$name.draw AREA\n";
       					} else {
       						print "$name.draw STACK\n";
+      					}
       					$count+=1;
+      				}
+      			}
+      		}
+      	}
       	exit(0);
       } else {
       	my $cachefile=$cachedir . "/" . &get_cache_file_name($scriptname,$id,$type);
       	$debug && print "Reading cache file: " . $cachefile . "\n";
       	my %cache=read_cache($cachefile);
       	$debug && print "Number of lines in cache file: " . keys(%cache) . "\n";
       	if(keys(%cache)>0){
       		for my $key ( sort keys %cache ){
       			my $value=$cache{$key};
       			if($key =~ m/^([A-Za-z]+)\_(\S+)$/){
       				my $name=$2;
       				if ($1 eq $type){
       					$name=get_fieldname($name);
       					print $name . ".value " . $value . "\n";
+      				}
       			} elsif(m/^(\S+)\s+(\S+)$/){
       				if ($1 eq $type){
       					print $1 . ".value " . $2 . "\n";
+      				}
+      			}
+      		}
+      	}
+      }
       # vim:syntax=perl

Projet

Général

Profil

Munin contributed plugins

root / plugins / http / http_load_ @ 4b2fcbf8