Projet

Général

Profil

Révision 942bda31

ID942bda31651dd35f61839a1cdc702a67027c5279
Parent 3754ecc4
Enfant c7efedf6

Ajouté par Kenyon Ralph il y a plus de 13 ans

extract apache_vhosts tarball

Voir les différences:

plugins/apache/apache_vhosts/README.txt
1
apache_pipelogger: to be used in an apache CustomLog directive
2
apache_logparser: daemon to watch logs and store stats in shared mem
3
plugins/apache_vhosts: munin plugin to deliver stats
plugins/apache/apache_vhosts/apache_logparser
1
#!/usr/bin/perl
2

  
3
=head1 README
4

  
5
This is a logfile watcher for apache, it monitors a log dir for access logs and saves some stats to shared memory.
6
Munin can then fetch and reset the stats periodically.
7

  
8
Just start it once, it runs as a daemon and polls logs every n sec keeping track of changes to the logs.
9
Filelist is read on startup and on defined scan_intervals. File position is recorded and logs are checked for truncate/delete (for performance reasons).
10

  
11
Requires perl modules File::Tail::Multi Storable IPC::ShareLite Munin::Plugin (optional Data::Dumper)
12

  
13
You can use it in parallel to the pipelogger if that suits you better, the stats are merged in shared mem.
14
Both ways should show decent performance, the pipelogger works in RAM only, but writes no logs.
15

  
16

  
17
=head1 INSTALLATION
18

  
19
Install to /usr/share/munin and run it as root
20

  
21
configure the variables below:
22

  
23
$dir		path to your logfiles
24
$files		file-glob to find access logs
25
$site		regexp to find sitename from logfile name
26
$statefile	file to save last log position for tail
27
$nsec		tail and write to shared mem every n seconds
28
$debug		dump tallied data every n seconds, print every log line parsed
29
$scan_interval	rescan for new log files every n minutes
30
$type		log file type:
31
                common: CLF + vhost + time + (other fields)
32
                combined: combined + time + (other fields)
33
=cut
34

  
35
# config
36
my $dir = "/logs/apache_logs";
37
my $files = "*access_log";
38
my $site = "(.*)-access_log";
39
my $statefile = "/tmp/logstate";
40
`touch $statefile` unless (-f $statefile);
41
local $type="combined";
42
local $nsec=7;
43
local $debug=0;
44

  
45
my $scan_interval=5; # minutes
46

  
47
# perl modules
48
use File::Tail::Multi;
49
use Storable qw(freeze thaw);
50
use List::Util qw(min max);
51
use IPC::ShareLite ':lock';
52
require Data::Dumper if $debug;
53
use Munin::Plugin;
54

  
55
# shared mem
56
local $share = IPC::ShareLite->new(
57
	-key     => 'mapl',
58
	-create  => 1,
59
	-destroy => 1,
60
	-exclusive => 0,
61
	-mode => '0666'
62
) or die $!;
63

  
64
# drop stored data on reload
65
$share->store( freeze {} );
66

  
67
# tail log files
68
my $tail=File::Tail::Multi->new (
69
  Files=>["$dir/$files"],
70
  ScanForFiles=>$scan_interval,
71
  Debug=>0,
72
  LastRun_File => $statefile,
73
  RemoveDuplicate=>0,
74
  NumLines=>0,
75
  OutputPrefix=>"f"
76
);
77

  
78
# read to current position
79
$tail->read;
80

  
81
# register counting function
82
$tail->Function(\&count);
83

  
84
local $temp;
85
my ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$referer,$useragent,$time);
86
sub count {
87
        foreach $_ (@{shift()})  {
88
        if ((()=/"/g)==2) {
89
          # common with filename prefix, optionally add time and vhost at the end
90
          ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$time,$vhost)=/^(.*?)\s:\s(.*?)\s(.*?)\s(.*?)\s\[(.*?)\]\s"(.*)\s(.*?)\s(.*?)"\s(\d*)\s(\S*)\s?(\S*)\s?(\S*?)$/o;
91
        }
92
        elsif ((()=/"/g)==6) {
93
          # combined with filename prefix, optionally add time and vhost at the end
94
          ($file,$ip,$logname,$user,$rtime,$method,$request,$protocol,$status,$bytes,$referer,$useragent,$time,$vhost)=/^(.*?)\s:\s(.*?)\s(.*?)\s(.*?)\s\[(.*?)\]\s"(.*)\s(.*?)\s(.*?)"\s(\d*?)\s(.*?)\s"(.*?)"\s"(.*?)"\s?(\S*)\s?(\S*)$/o;
95
        };
96

  
97
	#find sitename
98
	$file=~s/$site/$1/;
99
	$file=$vhost if $vhost;
100
	
101
	# skip broken lines
102
	next unless $file;
103

  
104
	# sitename to munin fieldname
105
	my $vpm=clean_fieldname("$file");
106
	$temp{$vpm}{'label'}="$file";
107
	$temp{$vpm}{'label'}=~s/www\.//;
108
	
109
	# count all requests
110
	$temp{$vpm}{'requests'}++;
111

  
112
	if ($bytes) {
113
	 $bytes=~s/-/0/;
114
 	 # bytes transmitted
115
	 $temp{$vpm}{'bytes'}+=$bytes;
116

  
117
	 # max bytes
118
	 $temp{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$bytes) || 0;
119

  
120
         # average bytes
121
         $temp{$vpm}{'avg_bytes'}=$temp{$vpm}{'bytes'}/$temp{$vpm}{'requests'} || 0;
122
	}
123
	
124
	# count by status / error code
125
	$temp{$vpm}{"status"}{$status}++ if $status;
126

  
127
	if ($time) {
128
	  # microsec to millisec
129
  	  $time=sprintf("%d",$time/1000); 
130

  
131
  	  # min/max execution time
132
  	  $temp{$vpm}{'max_time'}=max($temp{$vpm}{'max_time'},$time) || 0;
133

  
134
  	  # cumulative execution time
135
  	  $temp{$vpm}{'time'}+=$time;
136

  
137
          # average time
138
          $temp{$vpm}{'avg_time'}=$temp{$vpm}{'time'}/$temp{$vpm}{'requests'} || 0;
139
        }
140

  
141
        };
142
};
143

  
144

  
145
while (1) {
146
        # tail files, calls &count with linearray
147
        $tail->read;
148

  
149
        # begin transaction                
150
        $share->lock(LOCK_EX);
151
        
152
        # get data (may be updated by other loggers too)
153
        my %old=%{thaw $share->fetch};
154

  
155
        foreach my $vpm (keys %temp){
156
                # merge values
157
                $old{$vpm}{'label'}=$temp{$vpm}{'label'};
158
                $old{$vpm}{'bytes'}+=$temp{$vpm}{'bytes'} if $temp{$vpm}{'bytes'};
159
                $old{$vpm}{'requests'}+=$temp{$vpm}{'requests'} if $temp{$vpm}{'requests'};
160
                $old{$vpm}{'time'}+=$temp{$vpm}{'time'} if $temp{$vpm}{'time'};
161
                # avoid div by zero
162
                my $div=($old{$vpm}{'requests'} <1)?1:$old{$vpm}{'requests'};
163
                # recalc average on merged data for multiple datasources, use local average after purge/restart
164
                $old{$vpm}{'avg_time'}=($old{$vpm}{'avg_time'}>0)?sprintf("%d",($old{$vpm}{'time'}+$temp{$vpm}{'time'})/$div):sprintf("%d",$temp{$vpm}{'avg_time'});
165
                $old{$vpm}{'avg_bytes'}=($old{$vpm}{'avg_bytes'}>0)?sprintf("%d",($old{$vpm}{'bytes'}+$temp{$vpm}{'bytes'})/$div):sprintf("%d",$temp{$vpm}{'avg_bytes'});
166
                $old{$vpm}{'max_time'}=max($old{$vpm}{'max_time'},$temp{$vpm}{'max_time'}) || 0;
167
                $old{$vpm}{'max_bytes'}=max($old{$vpm}{'max_bytes'},$temp{$vpm}{'max_bytes'}) || 0;
168

  
169
                # reset local counters
170
                foreach my $check qw(requests bytes time max_bytes avg_bytes max_time avg_time) {
171
                        $temp{$vpm}{$check}=0;
172
                }
173

  
174
                # reset status counts
175
                foreach my $val (keys %{$temp{$vpm}{'status'}}) {
176
                        $old{$vpm}{'status'}{$val}+=$temp{$vpm}{'status'}{$val};
177
                        $temp{$vpm}{'status'}{$val}=0;
178
                }
179

  
180
        };
181

  
182
        # save to shm
183
        print Data::Dumper::Dumper(%old) if $debug;
184
        $share->store( freeze \%old );
185
        # end transaction
186
        $share->unlock;
187
  
188
        # parse/write every n seconds (plus processing time)
189
        sleep $nsec;
190
}
plugins/apache/apache_vhosts/apache_pipelogger
1
#!/usr/bin/perl
2

  
3
=head1
4

  
5
# Log vhost port method response_bytes response_time status
6
<IfModule mod_log_config.c>
7
  CustomLog "|/usr/share/munin/apache_pipelogger" "%v %p %m %B %D %s"
8
</IfModule>
9

  
10
=cut
11
# write every n seconds to shared memory
12
local $nsec=7;
13
local $debug=undef;
14

  
15
use Storable qw(freeze thaw);
16
use List::Util qw(min max);
17
use IPC::ShareLite ':lock';
18
require Data::Dumper if $debug;
19
use Munin::Plugin;
20

  
21

  
22
local $share = IPC::ShareLite->new(
23
	-key     => 'mapl',
24
	-create  => 1,
25
	-destroy => 1,
26
	-exclusive => 0,
27
	-mode => '0666'
28
) or die $!;
29

  
30

  
31
local $SIG{'ALRM'}=\&periodic_write;
32
alarm $nsec;
33

  
34

  
35
# drop stored data on reload
36
local %temp=();
37

  
38
while (<STDIN>) {
39
 	my ($vhost,$port,$method,$bytes,$time,$status)=split(/\s/,$_);
40

  
41
	# sanity check
42
	next unless m/^([\d\w.-_]+\s){5}([\d\w.-_]+$)/;
43
	$time=sprintf("%d",$time/1000); # microsec to millisec	
44
	
45
	# sitename to munin fieldname
46
	my $vpm=clean_fieldname($vhost);
47
	$temp{$vpm}{'label'}=$vhost;
48
	$temp{$vpm}{'label'}=~s/www\.//;
49
	
50
	# count all requests
51
	$temp{$vpm}{'requests'}++;
52

  
53
	if ($bytes) {
54
	 $bytes=~s/-/0/;
55
 	 # bytes transmitted
56
	 $temp{$vpm}{'bytes'}+=$bytes;
57

  
58
	 # max bytes
59
	 $temp{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$bytes);
60

  
61
         # average bytes
62
         $temp{$vpm}{'avg_bytes'}=$temp{$vpm}{'bytes'}/$temp{$vpm}{'requests'} || 0 if ($bytes);
63
	}
64
	
65
	# count by status / error code
66
	$temp{$vpm}{"status"}{$status}++ if $status;
67

  
68
	if ($time) {
69
	  # microsec to millisec
70
  	  $time=sprintf("%d",$time/1000); 
71

  
72
  	  # min/max execution time
73
  	  $temp{$vpm}{'max_time'}=max($temp{$vpm}{'max_time'},$time);
74

  
75
  	  # cumulative and average execution time
76
  	  $temp{$vpm}{'cml_time'}+=$time;
77

  
78
          # average time
79
          $temp{$vpm}{'avg_time'}=$temp{$vpm}{'cml_time'}/$temp{$vpm}{'requests'} || 0 if ($time);
80
        }
81
};
82

  
83
sub periodic_write {
84
        # begin transaction                
85
        $share->lock(LOCK_EX);
86
        
87
        # get data (may be updated by other loggers too)
88
        my %old=%{thaw $share->fetch};
89

  
90
        foreach my $vpm (keys %temp){
91
                # merge values
92
                $old{$vpm}{'bytes'}+=$temp{$vpm}{'bytes'} if $temp{$vpm}{'bytes'};
93
                $old{$vpm}{'requests'}+=$temp{$vpm}{'requests'} if $temp{$vpm}{'requests'};
94
                $old{$vpm}{'time'}+=$temp{$vpm}{'time'} if $temp{$vpm}{'time'};
95
                $old{$vpm}{'label'}=$temp{$vpm}{'label'};
96
                $old{$vpm}{'avg_time'}=sprintf("%d",($old{$vpm}{'avg_time'}+$temp{$vpm}{'avg_time'})/2);
97
                $old{$vpm}{'max_time'}=max($old{$vpm}{'max_time'},$temp{$vpm}{'max_time'});
98
                $old{$vpm}{'max_bytes'}=max($temp{$vpm}{'max_bytes'},$temp{$vpm}{'max_bytes'});
99
                $old{$vpm}{'avg_bytes'}=sprintf("%d",($old{$vpm}{'avg_bytes'}+$temp{$vpm}{'avg_bytes'})/2);
100

  
101
                # reset local counters
102
                foreach my $check qw(requests bytes time cml_time max_bytes avg_bytes max_time avg_time) {
103
                        $temp{$vpm}{$check}=0;
104
                }
105

  
106
                # reset status counts
107
                foreach my $val (keys %{$temp{$vpm}{'status'}}) {
108
                        $old{$vpm}{'status'}{$val}+=$temp{$vpm}{'status'}{$val};
109
                        $temp{$vpm}{'status'}{$val}=0;
110
                }
111

  
112
        };
113

  
114
        # save to shm
115
#        print Data::Dumper::Dumper(%old) if $debug;
116
        $share->store( freeze \%old );
117

  
118
        # end transaction
119
        $share->unlock;
120

  
121
        # parse/write every n seconds
122
        alarm $nsec;
123
}
plugins/apache/apache_vhosts/apache_vhosts
1
#!/usr/bin/perl
2

  
3
=head1 INSTALLATION
4

  
5
This plugin requires data from apache. You can get at the data in two ways:
6

  
7
1) Install the pipelogger (logs without using disk space, ram only, highly performant)
8
  - Install /usr/share/munin/apache_pipelogger as executable for apache/wwwrun
9
  - Install logger to httpd.conf
10

  
11
    # Log vhost port method response_bytes response_time_ms httpd_status
12
    <IfModule mod_log_config.c>
13
      CustomLog "|/usr/share/munin/apache_pipelogger" "$v %p %m %B %D %s"
14
    </IfModule>
15

  
16
2) Install the log parser as daemon (watches multiple access logs in a single folder for changes)
17
  - the log parser should run as root (can simply be run in background)
18
  - slightly less performant, but easier to apply to existing installations
19
  - If you want response time stats, you have to log them in apache:
20
  <IfModule mod_log_config.c>
21
    LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %D" combined-time  
22
  </IfModule>
23
  - Configure the log parser to match your installation regarding naming and log folders
24
    
25
You can use both solutions simultaneously, the data will be merged. 
26
Be aware that a apache log CustomLog directive in the master config will only log those vhosts that have no directive of their own.
27
  
28
Install plugin conf (after [apache_*])
29

  
30
[apache_vhosts]
31
user root
32
env.subgraphs requests bytes time
33
env.checks requests bytes time
34

  
35
# user		- probably necessary for shared memory IPC
36
# subgraphs 	- create multigraph subgraphs (watch your graphing performance...), default 0
37
# checks	- enable stats on bytes and response times per request, you have to log these in apache
38

  
39
A word on performance: 
40
Requests/sec should not be much of a problem.  Pipelogger and Logparser should not have man performance problems, as the apply one regex per line and add some stats.
41
Stats are saved every n seconds (default: 7) to shared mem in serialized format. That should be ok on the most loaded servers (unless you watch cache logs). 
42
I would estimate that > 10k log lines/sec could start becoming a problem, you might have to start tuning there or use a dedicated system. 
43
You might think about splitting the logs over multiple Logparser scripts to parallelize and merge in larger intervals.
44

  
45
Graphing is another matter, the more vhosts you have. 
46
With subgraphs off, you do 3 main graphs * 4 timescales (day, week, month, year). 
47
With subgraphs on, you get 2 checks * (1 + 6 * #vhosts) + 1 check * (1 + #vhosts * #statuscodes * 4)
48
With hundreds of vhosts that becomes a problem, as munin-update and munin-html do not scale well.
49

  
50
Timeouts are another matter, munin-updates calls for the plugin-data and works on the received lines while the network timeout is running.
51
So expect to set your timeouts to 120s with a hundred vhosts.
52

  
53
=head1 MAGIC MARKERS
54

  
55
  #%# family=auto
56
  #%# capabilities=autoconf
57

  
58
=head1 LICENSE
59

  
60
GPLv2
61

  
62
=cut
63

  
64

  
65
my %checks = map {$_=>1} ( ($ENV{'checks'}) ? split(/ /,$ENV{'checks'}) : qw(requests bytes time) );
66
my %subgraphs= map {$_=>1} ( ($ENV{'subgraphs'}) ? split(/ /,$ENV{'subgraphs'}) : () );
67

  
68
use strict;
69
#use warnings;
70
use Munin::Plugin;
71
use IPC::ShareLite ':lock';
72
use Storable qw(freeze thaw);
73

  
74
my $share = IPC::ShareLite->new(
75
	-key     => 'mapl',
76
	-create  => 0,
77
	-destroy => 0,
78
	-exclusive => 0,
79
	-mode => '0744'
80
) or die $!;
81

  
82

  
83
my %data=%{thaw $share->fetch};
84

  
85
if ( defined $ARGV[0] and $ARGV[0] eq "autoconf" ) {
86
  if (scalar(keys %data)>0) {
87
    print "yes\n";
88
    exit 0;
89
  } else {
90
    print "no data available, apache_pipelogger not installed\n";
91
    exit 0;
92
  }
93
}
94

  
95
need_multigraph();
96

  
97

  
98
my ($config,$values);
99

  
100

  
101
#
102
# config
103
#
104

  
105
if ( defined $ARGV[0] and $ARGV[0] eq "config" ) {
106
  foreach my $check (keys %checks) {
107
  next if ($check eq 'requests'); # requests are special
108
  my $order=join("_$check ",sort keys %data)."_$check";
109

  
110
#
111
# config: bytes / time + subgraphs
112
#
113

  
114
    print <<END;
115

  
116
multigraph apache_vhosts_$check
117
graph_title average $check on all active vhosts
118
graph_args --base 1000
119
graph_vlabel average $check per response
120
graph_category apache_vhosts
121
graph_period minute
122
graph_order $order
123
END
124

  
125
    foreach my $site (keys %data) {
126
      print <<END;
127
${site}_$check.label $data{$site}{'label'}
128
${site}_$check.info average $check per response on $data{$site}{'label'}
129
${site}_$check.draw LINE1
130
${site}_$check.type GAUGE
131
END
132
    } # end sites
133

  
134
    if ($subgraphs{'$check'}) {
135
      foreach my $site (keys %data) {
136
      print <<END;
137

  
138
multigraph apache_vhosts_$check.$site
139
graph_title average $check on $data{$site}{'label'}
140
graph_args --base 1000
141
graph_vlabel average response in $check
142
graph_category apache_vhosts
143
graph_period minute
144
END
145

  
146
        foreach my $graph ("avg","max") {
147
          print <<END;
148
${site}_${graph}_$check.label $graph$check
149
${site}_${graph}_$check.info $graph$check per response on $data{$site}{'label'}
150
${site}_${graph}_$check.draw LINE1
151
${site}_${graph}_$check.type GAUGE
152
END
153
        } # end graph
154
      } # end sites
155
    } # end subgraph	
156
  } # end checks
157

  
158

  
159

  
160
#
161
# config: requests + subgraphs
162
#
163
my $order=join("_requests ",sort keys %data)."_requests";
164

  
165
  print <<END;
166

  
167
multigraph apache_vhosts_requests
168
graph_title requests by vhost
169
graph_args --base 1000
170
graph_vlabel requests / \${graph_period}
171
graph_category apache_vhosts
172
graph_period minute
173
graph_order $order
174
END
175
  foreach my $site (keys %data) {
176
  
177
    print <<END;
178
${site}_requests.label $data{$site}{'label'}
179
${site}_requests.info $site
180
${site}_requests.draw LINE1
181
${site}_requests.type GAUGE
182
END
183

  
184
  } # end site
185

  
186
  if ($subgraphs{'requests'}) {
187

  
188
#  multigraphs multivalue (status codes)
189
    foreach my $site (keys %data) {
190
      print <<END;
191

  
192
multigraph apache_vhosts_requests.$site
193
graph_title status codes on $data{$site}{'label'}
194
graph_args --base 1000
195
graph_vlabel status codes / \${graph_period}
196
graph_category apache_vhosts
197
graph_period minute
198
END
199
      my $draw='AREA';
200
      foreach my $status (sort keys %{$data{$site}{'status'}}) {
201
            print <<END;
202
${site}_s${status}.label status $status
203
${site}_s${status}.info status $status
204
${site}_s${status}.draw $draw
205
${site}_s${status}.type GAUGE
206
END
207
        $draw='STACK';
208
      } # end status
209
    } # end sites
210
  } # end multigraph
211

  
212
  exit 0;
213
} # end if config
214

  
215

  
216

  
217

  
218
#
219
# values: bytes / time + subgraphs
220
#
221

  
222
foreach my $check (keys %checks) {
223
  next if ($check eq 'requests'); # requests are special
224

  
225
  # main graphs values
226
  print "\nmultigraph apache_vhosts_$check\n";
227
  foreach my $site (keys %data) {
228
    $data{$site}{$check}||=0;
229
    print "${site}_$check.value $data{$site}{'avg_'.$check}\n";
230
  } # end sites
231

  
232
  if ($subgraphs{$check}) {
233
    # subgraph values 
234
    foreach my $site (keys %data) {
235
      print "\nmultigraph apache_vhosts_$check.$site\n";
236
      foreach my $graph ("avg","max") {
237
        $data{$site}{$check}||=0;
238
        print "${site}_${graph}_$check.value ".$data{$site}{$graph."_".$check}."\n";
239
      } # end graph
240
    } # end sites
241
  } # end subgraph
242
} # end checks
243

  
244

  
245

  
246

  
247
#
248
# values: requests + subgraphs
249
#
250

  
251
print "\nmultigraph apache_vhosts_requests\n";
252
foreach my $site (keys %data) {
253
  $data{$site}{'requests'}||=0;
254
  print "${site}_requests.value $data{$site}{'requests'}\n";
255
} # end sites
256

  
257
if ($subgraphs{'requests'}) {
258
  # multigraphs multivalue (status codes)
259
  foreach my $site (keys %data) {
260
    print "\nmultigraph apache_vhosts_requests.$site\n";
261
    foreach my $status (sort keys %{$data{$site}{'status'}}) {
262
      $data{$site}{'status'}{$status}||=0;
263
      print "${site}_${status}.value ".($data{$site}{'status'}{$status}||0)."\n";
264
    }# end status
265
  } # end sites
266
} # end subgraph
267

  
268

  
269

  
270

  
271
#
272
# clear data after poll
273
#
274

  
275
foreach my $site (keys %data) {
276
  foreach my $check ( qw(requests bytes time max_bytes avg_bytes max_time avg_time) ) {
277
    $data{$site}{$check}=0;
278
  }
279
  foreach my $val (keys %{$data{$site}{'status'}}) {
280
     $data{$site}{'status'}{$val}=0;
281
  }
282
};
283

  
284
$share->lock(LOCK_EX);
285
$share->store( freeze \%data );
286
$share->unlock();
287

  
288
exit 0;
289
# vim:syntax=perl

Formats disponibles : Unified diff