Projet

Général

Profil

Révision 44e66720

ID44e66720361cc32a483c4f4c1721a2e574775f1e
Parent 580f586d
Enfant 5a92255c

Ajouté par Adrien "ze" Urban il y a environ 13 ans

cpu_linux_multi: detailed cpu usage per core (multigraph, supersampling)

Voir les différences:

plugins/system/cpu_linux_multi
1
#! /usr/bin/perl
2
########################################################################
3
#                                                                      #
4
#    WARNING    WARNING    WARNING    WARNING    WARNING    WARNING    #
5
#                                                                      #
6
#       This plugin does not work properly with multiple master        #
7
#                                                                      #
8
########################################################################
9
#
10
#
11
# multigraph, supersampling, extended cpu informations
12
#
13
# require: mpstat (to actually collect the data)
14
# require linux /proc
15
#   (sorry, quick and dirty retrieve the number of cpu from /proc/cpuinfo)
16
#
17
#
18
# ENV (default):
19
#  MUNIN_PLUGSTATE  - pid and cache files gets there
20
#
21
# ENV (user defined):
22
#  MUNIN_UPDATERATE - rate at which to update (default: 1s)
23
#  MUNIN_CACHEFLUSH_RATE - flush data every N batch (default: 1)
24
#  MUNIN_MPSTAT - binary to use as mpstat
25
#
26
#  increase cache flush rate if you have i/o performance issues
27
#  warning: increasing flushrate too much might cause partial write, and loss
28
#  of data. 0 to disable flush
29
#
30
#
31
# Parent graph: cpu usage per core/thread
32
# child graph(1): detailed cpu usage overall
33
# child graph(n): detailed cpu usage per thread
34
#
35
# Known bugs:
36
#
37
#   Multi-Master
38
#     If there are many masters, the data is only sent once. Each master will
39
#     only have part of the data.
40
#
41
#   Everlasting
42
#     The daemon is launched on first config/fetch. A touch of the pidfile is
43
#     done on every following config/fetch. The daemon should check if the
44
#     pidfile is recent (configurable) enough, and stop itself if not.
45
#
46
#   Graph Order
47
#     There is currently (2.0.6) noway to order childgraphs.
48
#
49
#   RRD file
50
#     The master currently (2.0.6) generate rrd file for aggregate values, and
51
#     complains that no data is provided for them (but the graph still works
52
#     fine)
53

  
54
#%# family=auto
55
#%# capabilities=autoconf
56

  
57
use strict;
58
use warnings;
59

  
60
my $plugin = $0;
61
$plugin =~ s/.*\///;
62

  
63
# order to display
64
my $fields_order = [
65
	'sys',
66
	'usr',
67
	'nice',
68
	'idle',
69
	'iowait',
70
	'irq',
71
	'soft',
72
	'steal',
73
	'guest',
74
];
75
# order is the order given by mpstat
76
my $fields_info = [
77
	{
78
		name => 'usr',
79
		label => 'usr',
80
		info => "%s time spent in normal programs and daemons",
81
	}, {
82
		name => 'nice',
83
		label => 'nice',
84
		info => "%s time spent in nice(1)d programs and daemons",
85
	}, {
86
		name => 'sys',
87
		label => 'sys',
88
		info => "%s time spent in kernel system activity",
89
	}, {
90
		name => 'iowait',
91
		label => 'iowait',
92
		info => "%s time spent waiting for blocking I/O operations",
93
	}, {
94
		name => 'irq',
95
		label => 'irq',
96
		info => "%s time spent handling interrupts",
97
	}, {
98
		name => 'soft',
99
		label => 'soft',
100
		info => "%s time spent handling software interrupts",
101
	}, {
102
		name => 'steal',
103
		label => 'steal',
104
		info => "%s time spent elsewhere (stolen from us)",
105
	}, {
106
		name => 'guest',
107
		label => 'guest',
108
		info => "%s time spent in a guest operating system",
109
	}, {
110
		name => 'idle',
111
		label => 'idle',
112
		info => "%s time spent idling (waiting to get something to do)",
113
	}
114
];
115

  
116
# mpstat sampling interval
117
my $update_rate = 1;
118
if (defined $ENV{MUNIN_UPDATERATE}) {
119
	if ($ENV{MUNIN_UPDATERATE} =~ /^[1-9][0-9]*$/) {
120
		$update_rate = int($ENV{MUNIN_UPDATERATE});
121
	} else {
122
		print STDERR "Invalid update_rate: $ENV{MUNIN_UPDATERATE}";
123
	}
124
}
125

  
126
my $flush_interval = 1;
127
if (defined $ENV{MUNIN_CACHEFLUSH_RATE}) {
128
	if ($ENV{MUNIN_CACHEFLUSH_RATE} =~ /^[0-9]+$/) {
129
		$update_rate = int($ENV{MUNIN_CACHEFLUSH_RATE});
130
	} else {
131
		print STDERR "Invalid flush rate: $ENV{MUNIN_CACHEFLUSH_RATE}";
132
	}
133
}
134

  
135
my $mpstat = "mpstat";
136
if (defined $ENV{MUNIN_MPSTAT}) {
137
	if (-f $ENV{MUNIN_MPSTAT}) {
138
		print STDERR "MUNIN_STAT: file not found: $ENV{MUNIN_MPSTAT}";
139
	} else {
140
		$mpstat = defined $ENV{MUNIN_MPSTAT};
141
	}
142
}
143

  
144
sub pidfile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.pid" }
145
sub cachefile() { "$ENV{MUNIN_PLUGSTATE}/munin.$plugin.cache" }
146

  
147
sub graph_section() { "system:cpu" };
148
sub graph_name() { "cpu_extended_multi_1s" };
149
sub graph_title() { "CPU usage" };
150
sub graph_title_all() { "Overall CPU usage" };
151
sub graph_title_n($) { "CPU#" . shift . " usage" };
152
sub acquire_name() { "<$plugin> collecting information" }
153

  
154
my $cpu_count_cache = undef;
155
sub cpu_count() {
156
	# XXX: is there any way to do that cleanly ?
157
	if (not defined $cpu_count_cache) {
158
		$cpu_count_cache = `grep -c ^processor /proc/cpuinfo`;
159
		chomp $cpu_count_cache;
160
	}
161
	return $cpu_count_cache;
162
}
163

  
164
sub is_running() {
165
	if (-f pidfile()) {
166
		my $pid = undef;
167
		if (open FILE, "<", pidfile()) {
168
			$pid = <FILE>;
169
			close FILE;
170
			chomp $pid;
171
		}
172
		if ($pid) {
173
			# does not exist ? kill it
174
			if (kill 0, $pid) {
175
				return 1;
176
			}
177
		}
178
 		unlink(pidfile());
179
	}
180
	return 0;
181
}
182

  
183

  
184
# FIXME: should also trap kill sigint and sigterm
185
# FIXME: check pidfile got touched recently
186
sub acquire() {
187
	$0 = acquire_name();
188
	$ARGV = [ '<daemon>' ];
189
	$0 = "<$plugin> collecting information";
190
	open PIDFILE, '>', pidfile() or die "open: @{[ pidfile() ]}: $!\n";
191
	print PIDFILE $$, "\n";
192
	close PIDFILE;
193
	open CACHE, ">>", cachefile() or die "open: @{[ cachefile() ]}: $!\n";
194
	open MPSTAT, "-|", "$mpstat -P ALL $update_rate" or
195
		die "open mpstat|: $!\n";
196
	my $flush_count = 0;
197
	while (<MPSTAT>) {
198
		chomp;
199
		my @field = split();
200
		if (!($field[1] =~ /^(all|[0-9]+)$/)) {
201
			next;
202
		}
203
		$field[0] = $field[1];
204
		$field[1] = time();
205
		print CACHE join(" ", @field), "\n";
206
		if ($flush_interval) {
207
			if ($flush_interval == ++$flush_count) {
208
				CACHE->flush();
209
				$flush_count = 0;
210
			}
211
		}
212
	}
213
	unlink(pidfile());
214
	unlink(cachefile());
215
}
216

  
217
sub run_daemon() {
218
	if (is_running()) {
219
		my $atime;
220
		my $mtime;
221
		$atime = $mtime = time;
222
		utime $atime, $mtime, pidfile();
223
	} else {
224
		if (0 == fork()) {
225
			close(STDIN);
226
			close(STDOUT);
227
			close(STDERR);
228
			open STDIN, "<", "/dev/null";
229
			open STDOUT, ">", "/dev/null";
230
			open STDERR, ">", "/dev/null";
231
			acquire();
232
			exit(0);
233
		}
234
	}
235
}
236

  
237

  
238
sub run_autoconf() {
239
	# in case we have specified args, check the file before that
240
	my $file = $mpstat;
241
	$file =~ s/ .*//;
242
	my $path = `which "$file"`;
243
	if ($path) {
244
		print "yes\n";
245
	} else {
246
		print "no\n";
247
	}
248
}
249

  
250
sub show_config($$$) {
251
	my $i = shift;
252
	my $name = shift;
253
	my $title = shift;
254
	my $graph_order = "graph_order";
255
	for my $field (@$fields_order) {
256
		$graph_order .= " $field";
257
	}
258
	print <<EOF;
259
multigraph @{[ graph_name() ]}.cpu$i
260
graph_title $title
261
graph_vlabel cpu use %
262
graph_scale no
263
update_rate 1
264
graph_data_size custom 1d, 10s for 1w, 1m for 1t, 5m for 1y
265
$graph_order
266
EOF
267
	for my $field (@$fields_info) {
268
		my $style = "STACK";
269
		if ($field->{name} eq $fields_order->[0]) {
270
			$style = "AREA";
271
		}
272
		print <<EOF;
273
$field->{name}.label $field->{label}
274
$field->{name}.draw $style
275
$field->{name}.info @{[ sprintf($field->{info}, $name) ]}
276
$field->{name}.min 0
277
$field->{name}.cdef $field->{name}
278
EOF
279
	}
280
}
281

  
282
sub run_config() {
283
	run_daemon();
284
	my $cpus = cpu_count();
285
	my $graph_order = "graph_order";
286
	my $sub_order = "order cpuall";
287
	for (my $i = 0; $i < $cpus; ++$i) {
288
		$graph_order .= " use$i=@{[ graph_name() ]}.cpu$i.idle";
289
		$sub_order .= " cpu$i";
290
	}
291
# none of those seems to have any effect
292
#domain_$sub_order
293
#node_$sub_order
294
#graph_$sub_order
295
#service_$sub_order
296
#category_$sub_order
297
#group_$sub_order
298

  
299
	print <<EOF;
300
multigraph @{[ graph_name() ]}
301
graph_category @{[ graph_section() ]}
302
graph_title @{[ graph_title() ]}
303
graph_vlabel cpu use %
304
graph_scale no
305
graph_total All CPUs
306
update_rate 1
307
graph_data_size custom 1d, 10s for 1w, 1m for 1t, 5m for 1y
308
$graph_order
309
EOF
310
	my $style="AREA";
311
	for (my $i = 0; $i < $cpus; ++$i) {
312
		print <<EOF;
313
use$i.label CPU#$i
314
use$i.draw $style
315
use$i.cdef 100,use$i,-,${cpus},/
316
EOF
317
		$style = 'STACK';
318
	}
319
	# detailed sub graphs - 1 for all, and 1 per cpu
320
	show_config("all", "all CPU", graph_title_all());
321
	for (my $i = 0; $i < $cpus; ++$i) {
322
		show_config($i, "CPU$i", graph_title_n($i));
323
	}
324
}
325

  
326
sub fetch_showline($) {
327
	my $line = shift;
328
	my $n = 2;
329
	for my $field (@$fields_info) {
330
		print <<EOF;
331
$field->{name}.value $line->[1]:$line->[$n]
332
EOF
333
		++$n;
334
	}
335
}
336
sub run_fetch() {
337
	run_daemon();
338
	if (open CACHE, "+<", cachefile()) {
339
		my $cpus = {};
340
		while (<CACHE>) {
341
			chomp;
342
			my $field = [];
343
			@$field = split(/ /);
344
			if (not defined $cpus->{$field->[0]}) {
345
				$cpus->{$field->[0]} = [];
346
			}
347
			push @{$cpus->{$field->[0]}}, $field;
348
		}
349
		# finished reading ? trucate it right away
350
		truncate CACHE, 0;
351
		close CACHE;
352
		foreach my $cpu (keys %$cpus) {
353
			print <<EOF;
354
multigraph @{[ graph_name() ]}.cpu$cpu
355
EOF
356
			foreach my $line (@{$cpus->{$cpu}}) {
357
				fetch_showline($line);
358
			}
359
		}
360
	}
361
}
362

  
363
my $cmd = 'fetch';
364
if (defined $ARGV[0]) {
365
	$cmd = $ARGV[0];
366
}
367
if ('fetch' eq $cmd) {
368
	run_fetch();
369
} elsif ('config' eq $cmd) {
370
	run_config();
371
} elsif ('autoconf' eq $cmd) {
372
	run_autoconf();
373
} elsif ('daemon' eq $cmd) {
374
	run_daemon();
375
} else {
376
	print STDERR <<EOF;
377
$0: unrecognized command
378

  
379
Usage:
380
	$0 autoconf - check if we have everything we need
381
	$0 config - show plugin configuration
382
	$0 fetch - fetch latest data
383
	$0 daemon - launch daemon
384
EOF
385
	exit(1);
386
}
387
exit(0);

Formats disponibles : Unified diff