Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / disk / linux_diskstat_ @ 64089240

Historique | Voir | Annoter | Télécharger (16,4 ko)

1 a99c217e Michael Renner
#!/usr/bin/perl -w
2
# vim: sts=4 sw=4 ts=8
3
4
# Munin markers:
5
#%# family=auto
6
#%# capabilities=autoconf suggest
7
8
# Author: Michael Renner <michael.renner@amd.co.at>
9
10
# Version: 0.0.5, 2009-05-22
11
12
13
14
=head1 NAME
15
16
linux_diskstat_ - Munin plugin to monitor various values provided
17
via C</proc/diskstats>
18
19
=head1 APPLICABLE SYSTEMS
20
21
Linux 2.6 systems with extended block device statistics enabled.
22
23
24
=head1 INTERPRETATION
25
26
Among the more self-describing or well-known values like C<throughput>
27
(Bytes per second) there are a few which might need further introduction.
28
29
30
=head2 Device Utilization
31
32
Linux provides a counter which increments in a millisecond-interval for as long
33
as there are outstanding I/O requests. If this counter is close to 1000msec
34
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
35
provides values averaged over a 5 minute time frame per default, so it can't
36
catch short-lived saturations, but it'll give a nice trend for semi-uniform
37
load patterns as they're expected in most server or multi-user environments.
38
39
40
=head2 Device IO Time
41
42
The C<Device IO Time> takes the counter described under C<Device Utilization>
43
and divides it by the number of I/Os that happened in the given time frame,
44
resulting in an average time per I/O on the block-device level.
45
46
This value can give you a good comparison base amongst different controllers,
47 fba800ae Veres Lajos
storage subsystems and disks for similar workloads.
48 a99c217e Michael Renner
49
50
=head2 Syscall Wait Time
51
52
These values describe the average time it takes between an application issuing
53
a syscall resulting in a hit to a blockdevice to the syscall returning to the
54
application.
55
56
The values are bound to be higher (at least for read requests) than the time
57
it takes the device itself to fulfill the requests, since calling overhead,
58
queuing times and probably a dozen other things are included in those times.
59
60
These are the values to watch out for when an user complains that C<the disks
61
are too slow!>.
62
63
64
=head3 What causes a block device hit?
65
66
A non-exhaustive list:
67
68
=over
69
70
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
71
flag is set.
72
73
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
74
is exceeded.
75
76
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
77
modification of any of the values returned by stat(2), etc.)
78
79
=item * The pdflush daemon writing out dirtied pages
80
81
=item * (f)sync
82
83
=item * Swapping
84
85
=item * raw device I/O (mkfs, dd, etc.)
86
87
=back
88
89
=head1 ACKNOWLEDGEMENTS
90
91
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
92
package written and maintained by Sebastien Godard.
93
94
=head1 SEE ALSO
95
96
See C<Documentation/iostats.txt> in your Linux source tree for further information
97
about the C<numbers> involved in this module.
98
99
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
100
about the pdflush daemon.
101
102
=head1 AUTHOR
103
104
Michael Renner <michael.renner@amd.co.at>
105
106
=head1 LICENSE
107
108
GPLv2
109
110
111
=cut
112
113
114
use strict;
115
116
117
use File::Basename;
118
use Carp;
119
use POSIX;
120
121
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
122
# Don't try this at home
123
eval { require Munin::Plugin; Munin::Plugin->import; };
124
125
if ($@) {
126
    fake_munin_plugin();
127
}
128
129
130
# Sanity check to ensure that the script is called the correct name.
131
132
if (basename($0) !~ /^linux_diskstat_/) {
133
    die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
134
}
135
136
137
############
138
# autoconf #
139
############
140
141
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
142
    my %stats;
143
144
    # Capture any croaks on the way
145
    eval { %stats = parse_diskstats() };
146
147
    if ( !$@ && keys %stats ) {
148
149
        print "yes\n";
150
    }
151
    else {
152
        print "no\n";
153
    }
154 e4cd049b Lars Kruse
    exit 0;
155 a99c217e Michael Renner
}
156
157
158
###########
159
# suggest #
160
###########
161
162
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
163
164
    my %diskstats = parse_diskstats();
165
166
    my %suggested_devices;
167
168
  DEVICE:
169
    for my $devname ( sort keys %diskstats ) {
170
171
        # Skip devices without traffic
172
        next
173
          if ( $diskstats{$devname}->{'rd_ios'} == 0
174
            && $diskstats{$devname}->{'wr_ios'} == 0 );
175
176
        for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
177
178
            # Filter out devices (partitions) which are matched by existing ones
179
            # e.g. sda1 -> sda, c0d0p1 -> c0d0
180
            next DEVICE if ( $devname =~ m/$existing_device/ );
181
        }
182
183
        push @{ $suggested_devices{'iops'} },       $devname;
184
        push @{ $suggested_devices{'throughput'} }, $devname;
185
186
        # Only suggest latency graphs if the device supports it
187
        if (   $diskstats{$devname}->{'rd_ticks'} > 0
188
            || $diskstats{$devname}->{'wr_ticks'} > 0 )
189
        {
190
            push @{ $suggested_devices{'latency'} }, $devname;
191
        }
192
    }
193
194
    for my $mode ( keys %suggested_devices ) {
195
        for my $device ( sort @{ $suggested_devices{$mode} } ) {
196
197
            my $printdev = translate_device_name($device, 'TO_FS');
198
            print "${mode}_$printdev\n";
199
        }
200
    }
201
202
    exit 0;
203
}
204
205
206
# Reading the scripts invocation name and setting some parameters,
207
# needed from here on
208
209
my $basename = basename($0);
210
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
211
212
if ( not defined $device ) {
213
    croak "Didn't get a device name. Aborting\n";
214
}
215
216
$device = translate_device_name($device, 'FROM_FS');
217
218
##########
219
# config #
220
##########
221
222
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
223
224
    my $pretty_device = $device;
225
226
    if ($device =~ /^dm-\d+$/) {
227
	$pretty_device = translate_devicemapper_name($device);
228
    }
229
230
    if ( $mode eq 'latency' ) {
231
232
        print <<EOF;
233
graph_title Disk latency for /dev/$pretty_device
234
graph_args --base 1000
235
graph_category disk
236
237
util.label Device utilization (percent)
238
util.type GAUGE
239
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
240
util.min 0
241
svctm.label Average device IO time (ms)
242
svctm.type GAUGE
243
svctm.info Average time an I/O takes on the block device
244
svctm.min 0
245
avgwait.label Average IO Wait time (ms)
246
avgwait.type GAUGE
247
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
248
avgwait.min 0
249
avgrdwait.label Average Read IO Wait time (ms)
250
avgrdwait.type GAUGE
251
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
252
avgrdwait.min 0
253
avgwrwait.label Average Write IO Wait time (ms)
254
avgwrwait.type GAUGE
255
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
256
avgwrwait.min 0
257
258
EOF
259
260
    }
261
    elsif ( $mode eq 'throughput' ) {
262
263
        print <<EOF;
264
graph_title Disk throughput for /dev/$pretty_device
265
graph_args --base 1024
266
graph_vlabel Bytes/second
267
graph_category disk
268
269
rdbytes.label Read Bytes
270
rdbytes.type GAUGE
271
rdbytes.min 0
272
wrbytes.label Write Bytes
273
wrbytes.type GAUGE
274
wrbytes.min 0
275
276
EOF
277
    }
278
    elsif ( $mode eq 'iops' ) {
279
280
        print <<EOF;
281
graph_title Disk IOs for /dev/$pretty_device
282
graph_args --base 1000
283
graph_vlabel Units/second
284
graph_category disk
285
286
rdio.label Read IO/sec
287
rdio.type GAUGE
288
rdio.min 0
289
wrio.label Write IO/sec
290
wrio.type GAUGE
291
wrio.min 0
292
avgrqsz.label Average Request Size (KiB)
293
avgrqsz.type GAUGE
294
avgrqsz.min 0
295
avgrdrqsz.label Average Read Request Size (KiB)
296
avgrdrqsz.type GAUGE
297
avgrdrqsz.min 0
298
avgwrrqsz.label Average Write Request Size (KiB)
299
avgwrrqsz.type GAUGE
300
avgwrrqsz.min 0
301
302
EOF
303
304
    }
305
    else {
306
        croak "Unknown mode $mode\n";
307
    }
308
    exit 0;
309
}
310
311
312
########
313
# MAIN #
314
########
315
316
317
my %cur_diskstat = fetch_device_counters($device);
318
319
320
my ( $prev_time, %prev_diskstat ) = restore_state();
321
322
save_state( time(), %cur_diskstat );
323
324
# Probably the first run for the given device, we need state to do our job,
325
# so let's wait for the next run.
326
exit if ( not defined $prev_time or not %prev_diskstat );
327
328
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
329
330
331
332
########
333
# SUBS #
334
########
335
336
sub calculate_and_print_values {
337
    my ( $prev_time, $prev_stats, $cur_stats ) = @_;
338
339
    my $bytes_per_sector = 512;
340
341
    my $interval = time() - $prev_time;
342
343
    my $read_ios  = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
344
    my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
345
346
    my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
347
    my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
348
349
    my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
350
    my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
351
352
    my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
353
354
355
    my $read_io_per_sec  = $read_ios / $interval;
356
    my $write_io_per_sec = $write_ios / $interval;
357
358
    my $read_bytes_per_sec  = $rd_sectors / $interval * $bytes_per_sector;
359
    my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
360
361
362
    my $total_ios         = $read_ios + $write_ios;
363
    my $total_ios_per_sec = $total_ios / $interval;
364
365
    # Utilization - or "how busy is the device"?
366
    # If the time spent for I/O was close to 1000msec for
367
    # a given second, the device is nearly 100% saturated.
368
    my $utilization = $tot_ticks / $interval;
369
370
    # Average time an I/O takes on the block device
371
    my $servicetime =
372
      $total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
373
374
    # Average wait time for an I/O from start to finish
375
    # (includes queue times et al)
376
    my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
377
    my $average_rd_wait = $read_ios  ? $rd_ticks / $read_ios  : 0;
378
    my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
379
380
    my $average_rq_size_in_kb =
381
      $total_ios
382
      ? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
383
      : 0;
384
    my $average_rd_rq_size_in_kb =
385
      $read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
386
    my $average_wr_rq_size_in_kb =
387
      $write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
388
389
    my $util_print = $utilization / 10;
390
391
392
    if ( $mode eq 'latency' ) {
393
        print <<EOF;
394
395
util.value $util_print
396
svctm.value $servicetime
397
avgwait.value $average_wait
398
avgrdwait.value $average_rd_wait
399
avgwrwait.value $average_wr_wait
400
401
EOF
402
    }
403
    elsif ( $mode eq 'throughput' ) {
404
405
        print <<EOF;
406
407
rdbytes.value $read_bytes_per_sec
408
wrbytes.value $write_bytes_per_sec
409
410
EOF
411
    }
412
    elsif ( $mode eq 'iops' ) {
413
414
        print <<EOF;
415
416
rdio.value $read_io_per_sec
417
wrio.value $write_io_per_sec
418
avgrqsz.value $average_rq_size_in_kb
419
avgrdrqsz.value $average_rd_rq_size_in_kb
420
avgwrrqsz.value $average_wr_rq_size_in_kb
421
422
EOF
423
424
    }
425
    else {
426
        croak "Unknown mode $mode\n";
427
    }
428
429
}
430
431
sub read_diskstats {
432
433
    open STAT, '< /proc/diskstats'
434
      or croak "Failed to open '/proc/diskstats': $!\n";
435
436
    my @lines;
437
438
    for my $line (<STAT>) {
439
440
        # Strip trailing newline and leading whitespace
441
        chomp $line;
442
        $line =~ s/^\s+//;
443
444
        my @elems = split /\s+/, $line;
445
446
        # We explicitly don't support old-style diskstats
447
        # There are situations where only _some_ lines (e.g.
448
        # partitions on older 2.6 kernels) have fewer stats
449
        # numbers, therefore we'll skip them silently
450
        if ( @elems != 14 ) {
451
            next;
452
        }
453
        push @lines, \@elems;
454
    }
455
456
    close STAT or croak "Failed to close '/proc/diskstats': $!";
457
    return @lines;
458
}
459
460
sub read_sysfs {
461
462
    my ($want_device) = @_;
463
464
    my @devices;
465
    my @lines;
466
467
    if ( defined $want_device ) {
468
469
        # sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
470
        $want_device =~ tr#/#!#;
471
        @devices = $want_device;
472
    }
473
    else {
474
        @devices = glob "/sys/block/*/stat";
475
        @devices = map { m!/sys/block/([^/]+)/stat! } @devices;
476
    }
477
478
479
    for my $cur_device (@devices) {
480
        my $stats_file = "/sys/block/$cur_device/stat";
481
482
        open STAT, "< $stats_file"
483
          or croak "Failed to open '$stats_file': $!\n";
484
485
        my $line = <STAT>;
486
487
        # Trimming whitespace
488
        $line =~ s/^\s+//;
489
        chomp $line;
490
491
        my @elems = split /\s+/, $line;
492
493 f90427c3 Samuel Smith
        croak "'$stats_file' doesn't contain exactly 11 or 15 values. Aborting"
494
          if ( @elems != 11 && @elems != 15 );
495 a99c217e Michael Renner
496
        # Translate the devicename back before storing the information
497
        $cur_device =~ tr#!#/#;
498
499
        # Faking missing diskstats values
500 37b533a5 Samuel Smith
        unshift @elems, ( -1, -1, $cur_device );
501 a99c217e Michael Renner
502
        push @lines, \@elems;
503
504
        close STAT or croak "Failed to close '$stats_file': $!\n";
505
    }
506
507
    return @lines;
508
}
509
510
511
sub parse_diskstats {
512
513
    my ($want_device) = @_;
514
515
    my @stats;
516
517
    if ( glob "/sys/block/*/stat" ) {
518
519
        @stats = read_sysfs($want_device);
520
    }
521
    else {
522
        @stats = read_diskstats();
523
    }
524
525
    my %diskstats;
526
527
    for my $entry (@stats) {
528
529
        my %devstat;
530
531
        # Hash-Slicing for fun and profit
532
        @devstat{
533
            qw(major minor devname
534
              rd_ios rd_merges rd_sectors rd_ticks
535
              wr_ios wr_merges wr_sectors wr_ticks
536
              ios_in_prog tot_ticks rq_ticks)
537
          }
538
          = @{$entry};
539
540
        $diskstats{ $devstat{'devname'} } = \%devstat;
541
    }
542
543
    return %diskstats;
544
}
545
546
sub fetch_device_counters {
547
548
    my ($want_device) = @_;
549
550
    my %diskstats = parse_diskstats($want_device);
551
552
    for my $devname ( keys %diskstats ) {
553
554
        if ( $want_device eq $devname ) {
555
            return %{ $diskstats{$devname} };
556
        }
557
    }
558
    return undef;
559
}
560
561
562
# We use '+' (and formerly '-') as placeholder for '/' in device-names
563
# used as calling name for the script.
564
sub translate_device_name {
565
566
    my ($device, $mode) = @_;
567
568
    if ($mode eq 'FROM_FS') {
569
570
	# Hackaround to mitigate issues with unwisely chosen former separator
571
	if ( not ($device =~ m/dm-\d+/)) {
572
	    $device =~ tr#-+#//#;
573
	}
574
575
    }
576
    elsif ($mode eq 'TO_FS') {
577
578
	$device =~ tr#/#+#;
579
580
    }
581
    else {
582
	croak "translate_device_name: Unknown mode\n";
583
    }
584
585
    return $device;
586
}
587
588
589
sub fake_munin_plugin {
590
    my $eval_code = <<'EOF';
591
592
use Storable;
593
my $storable_filename = basename($0);
594
$storable_filename = "/tmp/munin-state-$storable_filename";
595
596
sub save_state {
597
    my @state = @_;
598
599
    if ( not -e $storable_filename or -f $storable_filename ) {
600
	store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
601
    }
602
    else {
603
	croak "$storable_filename is probably not a regular file. Please delete it.\n";
604
    }
605
}
606
607
sub restore_state {
608
609
    if (-f $storable_filename) {
610
	my $state = retrieve($storable_filename);
611
	return @{$state};
612
    }
613
    else {
614
	return undef;
615
    }
616
}
617
EOF
618
619
    eval($eval_code);
620
}
621
622
sub translate_devicemapper_name {
623
        my ($device) = @_;
624
625
        my ($want_minor) = $device =~ m/^dm-(\d+)$/;
626
627
        croak "Failed to extract devicemapper id" unless defined ($want_minor);
628
629
	my $dm_major = find_devicemapper_major();
630
	croak "Failed to get device-mapper major number\n" unless defined $dm_major;
631
632
        for my $entry (glob "/dev/mapper/\*") {
633
634
                my $rdev = (stat($entry))[6];
635
                my $major = floor($rdev / 256);
636
                my $minor = $rdev % 256;
637
638
                if ($major == $dm_major && $minor == $want_minor) {
639
640
                        my $pretty_name = translate_lvm_name($entry);
641
642
                        return defined $pretty_name ? $pretty_name : $entry;
643
644
                }
645
        }
646
        # Return original string if the device can't be found.
647
        return $device;
648
}
649
650
651
652
sub translate_lvm_name {
653
654
        my ($entry) = @_;
655
656
        my $device_name = basename($entry);
657
658 fba800ae Veres Lajos
        # Check for single-dash-occurrence to see if this could be a lvm devicemapper device.
659 a99c217e Michael Renner
        if ($device_name =~ m/(?<!-)-(?!-)/) {
660
661
                # split device name into vg and lv parts
662
                my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
663
                return undef unless ( defined($vg) && defined($lv) );
664
665
                # remove extraneous dashes from vg and lv names
666
                $vg =~ s/--/-/g;
667
                $lv =~ s/--/-/g;
668
669
                $device_name = "$vg/$lv";
670
671
                # Sanity check - does the constructed device name exist?
672
                if (stat("/dev/$device_name")) {
673
                        return "$device_name";
674
                }
675
676
        }
677
        return undef;
678
}
679
680
sub find_devicemapper_major {
681
682
    open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
683
684
    my $dm_major;
685
686
    for my $line (<FH>) {
687
	chomp $line;
688
689
	my ($major, $name) = split /\s+/, $line, 2;
690
691
	next unless defined $name;
692
693
	if ($name eq 'device-mapper') {
694
	    $dm_major = $major;
695
	    last;
696
	}
697
    }
698
    close(FH);
699
700
    return $dm_major;
701
}