Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / disk / linux_diskstat_ @ 8589c6df

Historique | Voir | Annoter | Télécharger (16,4 ko)

1 a99c217e Michael Renner
#!/usr/bin/perl -w
2
# vim: sts=4 sw=4 ts=8
3
4
# Munin markers:
5
#%# family=auto
6
#%# capabilities=autoconf suggest
7
8
# Author: Michael Renner <michael.renner@amd.co.at>
9
10
# Version: 0.0.5, 2009-05-22
11
12
13
14
=head1 NAME
15
16
linux_diskstat_ - Munin plugin to monitor various values provided
17
via C</proc/diskstats>
18
19
=head1 APPLICABLE SYSTEMS
20
21
Linux 2.6 systems with extended block device statistics enabled.
22
23
24
=head1 INTERPRETATION
25
26
Among the more self-describing or well-known values like C<throughput>
27
(Bytes per second) there are a few which might need further introduction.
28
29
30
=head2 Device Utilization
31
32
Linux provides a counter which increments in a millisecond-interval for as long
33
as there are outstanding I/O requests. If this counter is close to 1000msec
34
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
35
provides values averaged over a 5 minute time frame per default, so it can't
36
catch short-lived saturations, but it'll give a nice trend for semi-uniform
37
load patterns as they're expected in most server or multi-user environments.
38
39
40
=head2 Device IO Time
41
42
The C<Device IO Time> takes the counter described under C<Device Utilization>
43
and divides it by the number of I/Os that happened in the given time frame,
44
resulting in an average time per I/O on the block-device level.
45
46
This value can give you a good comparison base amongst different controllers,
47 fba800ae Veres Lajos
storage subsystems and disks for similar workloads.
48 a99c217e Michael Renner
49
50
=head2 Syscall Wait Time
51
52
These values describe the average time it takes between an application issuing
53
a syscall resulting in a hit to a blockdevice to the syscall returning to the
54
application.
55
56
The values are bound to be higher (at least for read requests) than the time
57
it takes the device itself to fulfill the requests, since calling overhead,
58
queuing times and probably a dozen other things are included in those times.
59
60
These are the values to watch out for when an user complains that C<the disks
61
are too slow!>.
62
63
64
=head3 What causes a block device hit?
65
66
A non-exhaustive list:
67
68
=over
69
70
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
71
flag is set.
72
73
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
74
is exceeded.
75
76
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
77
modification of any of the values returned by stat(2), etc.)
78
79
=item * The pdflush daemon writing out dirtied pages
80
81
=item * (f)sync
82
83
=item * Swapping
84
85
=item * raw device I/O (mkfs, dd, etc.)
86
87
=back
88
89
=head1 ACKNOWLEDGEMENTS
90
91
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
92
package written and maintained by Sebastien Godard.
93
94
=head1 SEE ALSO
95
96
See C<Documentation/iostats.txt> in your Linux source tree for further information
97
about the C<numbers> involved in this module.
98
99
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
100
about the pdflush daemon.
101
102
=head1 AUTHOR
103
104
Michael Renner <michael.renner@amd.co.at>
105
106
=head1 LICENSE
107
108
GPLv2
109
110
111
=cut
112
113
114
use strict;
115
116
117
use File::Basename;
118
use Carp;
119
use POSIX;
120
121
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
122
# Don't try this at home
123
eval { require Munin::Plugin; Munin::Plugin->import; };
124
125
if ($@) {
126
    fake_munin_plugin();
127
}
128
129
130
# Sanity check to ensure that the script is called the correct name.
131
132
if (basename($0) !~ /^linux_diskstat_/) {
133
    die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
134
}
135
136
137
############
138
# autoconf #
139
############
140
141
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
142
    my %stats;
143
144
    # Capture any croaks on the way
145
    eval { %stats = parse_diskstats() };
146
147
    if ( !$@ && keys %stats ) {
148
149
        print "yes\n";
150
        exit 0;
151
    }
152
    else {
153
        print "no\n";
154
        exit 1;
155
    }
156
}
157
158
159
###########
160
# suggest #
161
###########
162
163
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
164
165
    my %diskstats = parse_diskstats();
166
167
    my %suggested_devices;
168
169
  DEVICE:
170
    for my $devname ( sort keys %diskstats ) {
171
172
        # Skip devices without traffic
173
        next
174
          if ( $diskstats{$devname}->{'rd_ios'} == 0
175
            && $diskstats{$devname}->{'wr_ios'} == 0 );
176
177
        for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
178
179
            # Filter out devices (partitions) which are matched by existing ones
180
            # e.g. sda1 -> sda, c0d0p1 -> c0d0
181
            next DEVICE if ( $devname =~ m/$existing_device/ );
182
        }
183
184
        push @{ $suggested_devices{'iops'} },       $devname;
185
        push @{ $suggested_devices{'throughput'} }, $devname;
186
187
        # Only suggest latency graphs if the device supports it
188
        if (   $diskstats{$devname}->{'rd_ticks'} > 0
189
            || $diskstats{$devname}->{'wr_ticks'} > 0 )
190
        {
191
            push @{ $suggested_devices{'latency'} }, $devname;
192
        }
193
    }
194
195
    for my $mode ( keys %suggested_devices ) {
196
        for my $device ( sort @{ $suggested_devices{$mode} } ) {
197
198
            my $printdev = translate_device_name($device, 'TO_FS');
199
            print "${mode}_$printdev\n";
200
        }
201
    }
202
203
    exit 0;
204
}
205
206
207
# Reading the scripts invocation name and setting some parameters,
208
# needed from here on
209
210
my $basename = basename($0);
211
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
212
213
if ( not defined $device ) {
214
    croak "Didn't get a device name. Aborting\n";
215
}
216
217
$device = translate_device_name($device, 'FROM_FS');
218
219
##########
220
# config #
221
##########
222
223
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
224
225
    my $pretty_device = $device;
226
227
    if ($device =~ /^dm-\d+$/) {
228
	$pretty_device = translate_devicemapper_name($device);
229
    }
230
231
    if ( $mode eq 'latency' ) {
232
233
        print <<EOF;
234
graph_title Disk latency for /dev/$pretty_device
235
graph_args --base 1000
236
graph_category disk
237
238
util.label Device utilization (percent)
239
util.type GAUGE
240
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
241
util.min 0
242
svctm.label Average device IO time (ms)
243
svctm.type GAUGE
244
svctm.info Average time an I/O takes on the block device
245
svctm.min 0
246
avgwait.label Average IO Wait time (ms)
247
avgwait.type GAUGE
248
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
249
avgwait.min 0
250
avgrdwait.label Average Read IO Wait time (ms)
251
avgrdwait.type GAUGE
252
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
253
avgrdwait.min 0
254
avgwrwait.label Average Write IO Wait time (ms)
255
avgwrwait.type GAUGE
256
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
257
avgwrwait.min 0
258
259
EOF
260
261
    }
262
    elsif ( $mode eq 'throughput' ) {
263
264
        print <<EOF;
265
graph_title Disk throughput for /dev/$pretty_device
266
graph_args --base 1024
267
graph_vlabel Bytes/second
268
graph_category disk
269
270
rdbytes.label Read Bytes
271
rdbytes.type GAUGE
272
rdbytes.min 0
273
wrbytes.label Write Bytes
274
wrbytes.type GAUGE
275
wrbytes.min 0
276
277
EOF
278
    }
279
    elsif ( $mode eq 'iops' ) {
280
281
        print <<EOF;
282
graph_title Disk IOs for /dev/$pretty_device
283
graph_args --base 1000
284
graph_vlabel Units/second
285
graph_category disk
286
287
rdio.label Read IO/sec
288
rdio.type GAUGE
289
rdio.min 0
290
wrio.label Write IO/sec
291
wrio.type GAUGE
292
wrio.min 0
293
avgrqsz.label Average Request Size (KiB)
294
avgrqsz.type GAUGE
295
avgrqsz.min 0
296
avgrdrqsz.label Average Read Request Size (KiB)
297
avgrdrqsz.type GAUGE
298
avgrdrqsz.min 0
299
avgwrrqsz.label Average Write Request Size (KiB)
300
avgwrrqsz.type GAUGE
301
avgwrrqsz.min 0
302
303
EOF
304
305
    }
306
    else {
307
        croak "Unknown mode $mode\n";
308
    }
309
    exit 0;
310
}
311
312
313
########
314
# MAIN #
315
########
316
317
318
my %cur_diskstat = fetch_device_counters($device);
319
320
321
my ( $prev_time, %prev_diskstat ) = restore_state();
322
323
save_state( time(), %cur_diskstat );
324
325
# Probably the first run for the given device, we need state to do our job,
326
# so let's wait for the next run.
327
exit if ( not defined $prev_time or not %prev_diskstat );
328
329
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
330
331
332
333
########
334
# SUBS #
335
########
336
337
sub calculate_and_print_values {
338
    my ( $prev_time, $prev_stats, $cur_stats ) = @_;
339
340
    my $bytes_per_sector = 512;
341
342
    my $interval = time() - $prev_time;
343
344
    my $read_ios  = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
345
    my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
346
347
    my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
348
    my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
349
350
    my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
351
    my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
352
353
    my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
354
355
356
    my $read_io_per_sec  = $read_ios / $interval;
357
    my $write_io_per_sec = $write_ios / $interval;
358
359
    my $read_bytes_per_sec  = $rd_sectors / $interval * $bytes_per_sector;
360
    my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
361
362
363
    my $total_ios         = $read_ios + $write_ios;
364
    my $total_ios_per_sec = $total_ios / $interval;
365
366
    # Utilization - or "how busy is the device"?
367
    # If the time spent for I/O was close to 1000msec for
368
    # a given second, the device is nearly 100% saturated.
369
    my $utilization = $tot_ticks / $interval;
370
371
    # Average time an I/O takes on the block device
372
    my $servicetime =
373
      $total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
374
375
    # Average wait time for an I/O from start to finish
376
    # (includes queue times et al)
377
    my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
378
    my $average_rd_wait = $read_ios  ? $rd_ticks / $read_ios  : 0;
379
    my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
380
381
    my $average_rq_size_in_kb =
382
      $total_ios
383
      ? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
384
      : 0;
385
    my $average_rd_rq_size_in_kb =
386
      $read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
387
    my $average_wr_rq_size_in_kb =
388
      $write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
389
390
    my $util_print = $utilization / 10;
391
392
393
    if ( $mode eq 'latency' ) {
394
        print <<EOF;
395
396
util.value $util_print
397
svctm.value $servicetime
398
avgwait.value $average_wait
399
avgrdwait.value $average_rd_wait
400
avgwrwait.value $average_wr_wait
401
402
EOF
403
    }
404
    elsif ( $mode eq 'throughput' ) {
405
406
        print <<EOF;
407
408
rdbytes.value $read_bytes_per_sec
409
wrbytes.value $write_bytes_per_sec
410
411
EOF
412
    }
413
    elsif ( $mode eq 'iops' ) {
414
415
        print <<EOF;
416
417
rdio.value $read_io_per_sec
418
wrio.value $write_io_per_sec
419
avgrqsz.value $average_rq_size_in_kb
420
avgrdrqsz.value $average_rd_rq_size_in_kb
421
avgwrrqsz.value $average_wr_rq_size_in_kb
422
423
EOF
424
425
    }
426
    else {
427
        croak "Unknown mode $mode\n";
428
    }
429
430
}
431
432
sub read_diskstats {
433
434
    open STAT, '< /proc/diskstats'
435
      or croak "Failed to open '/proc/diskstats': $!\n";
436
437
    my @lines;
438
439
    for my $line (<STAT>) {
440
441
        # Strip trailing newline and leading whitespace
442
        chomp $line;
443
        $line =~ s/^\s+//;
444
445
        my @elems = split /\s+/, $line;
446
447
        # We explicitly don't support old-style diskstats
448
        # There are situations where only _some_ lines (e.g.
449
        # partitions on older 2.6 kernels) have fewer stats
450
        # numbers, therefore we'll skip them silently
451
        if ( @elems != 14 ) {
452
            next;
453
        }
454
        push @lines, \@elems;
455
    }
456
457
    close STAT or croak "Failed to close '/proc/diskstats': $!";
458
    return @lines;
459
}
460
461
sub read_sysfs {
462
463
    my ($want_device) = @_;
464
465
    my @devices;
466
    my @lines;
467
468
    if ( defined $want_device ) {
469
470
        # sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
471
        $want_device =~ tr#/#!#;
472
        @devices = $want_device;
473
    }
474
    else {
475
        @devices = glob "/sys/block/*/stat";
476
        @devices = map { m!/sys/block/([^/]+)/stat! } @devices;
477
    }
478
479
480
    for my $cur_device (@devices) {
481
        my $stats_file = "/sys/block/$cur_device/stat";
482
483
        open STAT, "< $stats_file"
484
          or croak "Failed to open '$stats_file': $!\n";
485
486
        my $line = <STAT>;
487
488
        # Trimming whitespace
489
        $line =~ s/^\s+//;
490
        chomp $line;
491
492
        my @elems = split /\s+/, $line;
493
494
        croak "'$stats_file' doesn't contain exactly 11 values. Aborting"
495
          if ( @elems != 11 );
496
497
        # Translate the devicename back before storing the information
498
        $cur_device =~ tr#!#/#;
499
500
        # Faking missing diskstats values
501
        unshift @elems, ( '', '', $cur_device );
502
503
        push @lines, \@elems;
504
505
        close STAT or croak "Failed to close '$stats_file': $!\n";
506
    }
507
508
    return @lines;
509
}
510
511
512
sub parse_diskstats {
513
514
    my ($want_device) = @_;
515
516
    my @stats;
517
518
    if ( glob "/sys/block/*/stat" ) {
519
520
        @stats = read_sysfs($want_device);
521
    }
522
    else {
523
        @stats = read_diskstats();
524
    }
525
526
    my %diskstats;
527
528
    for my $entry (@stats) {
529
530
        my %devstat;
531
532
        # Hash-Slicing for fun and profit
533
        @devstat{
534
            qw(major minor devname
535
              rd_ios rd_merges rd_sectors rd_ticks
536
              wr_ios wr_merges wr_sectors wr_ticks
537
              ios_in_prog tot_ticks rq_ticks)
538
          }
539
          = @{$entry};
540
541
        $diskstats{ $devstat{'devname'} } = \%devstat;
542
    }
543
544
    return %diskstats;
545
}
546
547
sub fetch_device_counters {
548
549
    my ($want_device) = @_;
550
551
    my %diskstats = parse_diskstats($want_device);
552
553
    for my $devname ( keys %diskstats ) {
554
555
        if ( $want_device eq $devname ) {
556
            return %{ $diskstats{$devname} };
557
        }
558
    }
559
    return undef;
560
}
561
562
563
# We use '+' (and formerly '-') as placeholder for '/' in device-names
564
# used as calling name for the script.
565
sub translate_device_name {
566
567
    my ($device, $mode) = @_;
568
569
    if ($mode eq 'FROM_FS') {
570
571
	# Hackaround to mitigate issues with unwisely chosen former separator
572
	if ( not ($device =~ m/dm-\d+/)) {
573
	    $device =~ tr#-+#//#;
574
	}
575
576
    }
577
    elsif ($mode eq 'TO_FS') {
578
579
	$device =~ tr#/#+#;
580
581
    }
582
    else {
583
	croak "translate_device_name: Unknown mode\n";
584
    }
585
586
    return $device;
587
}
588
589
590
sub fake_munin_plugin {
591
    my $eval_code = <<'EOF';
592
593
use Storable;
594
my $storable_filename = basename($0);
595
$storable_filename = "/tmp/munin-state-$storable_filename";
596
597
sub save_state {
598
    my @state = @_;
599
600
    if ( not -e $storable_filename or -f $storable_filename ) {
601
	store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
602
    }
603
    else {
604
	croak "$storable_filename is probably not a regular file. Please delete it.\n";
605
    }
606
}
607
608
sub restore_state {
609
610
    if (-f $storable_filename) {
611
	my $state = retrieve($storable_filename);
612
	return @{$state};
613
    }
614
    else {
615
	return undef;
616
    }
617
}
618
EOF
619
620
    eval($eval_code);
621
}
622
623
sub translate_devicemapper_name {
624
        my ($device) = @_;
625
626
        my ($want_minor) = $device =~ m/^dm-(\d+)$/;
627
628
        croak "Failed to extract devicemapper id" unless defined ($want_minor);
629
630
	my $dm_major = find_devicemapper_major();
631
	croak "Failed to get device-mapper major number\n" unless defined $dm_major;
632
633
        for my $entry (glob "/dev/mapper/\*") {
634
635
                my $rdev = (stat($entry))[6];
636
                my $major = floor($rdev / 256);
637
                my $minor = $rdev % 256;
638
639
                if ($major == $dm_major && $minor == $want_minor) {
640
641
                        my $pretty_name = translate_lvm_name($entry);
642
643
                        return defined $pretty_name ? $pretty_name : $entry;
644
645
                }
646
        }
647
        # Return original string if the device can't be found.
648
        return $device;
649
}
650
651
652
653
sub translate_lvm_name {
654
655
        my ($entry) = @_;
656
657
        my $device_name = basename($entry);
658
659 fba800ae Veres Lajos
        # Check for single-dash-occurrence to see if this could be a lvm devicemapper device.
660 a99c217e Michael Renner
        if ($device_name =~ m/(?<!-)-(?!-)/) {
661
662
                # split device name into vg and lv parts
663
                my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
664
                return undef unless ( defined($vg) && defined($lv) );
665
666
                # remove extraneous dashes from vg and lv names
667
                $vg =~ s/--/-/g;
668
                $lv =~ s/--/-/g;
669
670
                $device_name = "$vg/$lv";
671
672
                # Sanity check - does the constructed device name exist?
673
                if (stat("/dev/$device_name")) {
674
                        return "$device_name";
675
                }
676
677
        }
678
        return undef;
679
}
680
681
sub find_devicemapper_major {
682
683
    open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
684
685
    my $dm_major;
686
687
    for my $line (<FH>) {
688
	chomp $line;
689
690
	my ($major, $name) = split /\s+/, $line, 2;
691
692
	next unless defined $name;
693
694
	if ($name eq 'device-mapper') {
695
	    $dm_major = $major;
696
	    last;
697
	}
698
    }
699
    close(FH);
700
701
    return $dm_major;
702
}