Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / disk / linux_diskstat_ @ d24215d0

Historique | Voir | Annoter | Télécharger (16,4 ko)

1
#!/usr/bin/perl -w
2
# vim: sts=4 sw=4 ts=8
3

    
4
# Munin markers:
5
#%# family=auto
6
#%# capabilities=autoconf suggest
7

    
8
# Author: Michael Renner <michael.renner@amd.co.at>
9

    
10
# Version: 0.0.5, 2009-05-22
11

    
12

    
13

    
14
=head1 NAME
15

    
16
linux_diskstat_ - Munin plugin to monitor various values provided
17
via C</proc/diskstats>
18

    
19
=head1 APPLICABLE SYSTEMS
20

    
21
Linux 2.6 systems with extended block device statistics enabled.
22

    
23

    
24
=head1 INTERPRETATION
25

    
26
Among the more self-describing or well-known values like C<throughput>
27
(Bytes per second) there are a few which might need further introduction.
28

    
29

    
30
=head2 Device Utilization
31

    
32
Linux provides a counter which increments in a millisecond-interval for as long
33
as there are outstanding I/O requests. If this counter is close to 1000msec
34
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
35
provides values averaged over a 5 minute time frame per default, so it can't
36
catch short-lived saturations, but it'll give a nice trend for semi-uniform
37
load patterns as they're expected in most server or multi-user environments.
38

    
39

    
40
=head2 Device IO Time
41

    
42
The C<Device IO Time> takes the counter described under C<Device Utilization>
43
and divides it by the number of I/Os that happened in the given time frame,
44
resulting in an average time per I/O on the block-device level.
45

    
46
This value can give you a good comparison base amongst different controllers,
47
storage subsystems and disks for similar workloads.
48

    
49

    
50
=head2 Syscall Wait Time
51

    
52
These values describe the average time it takes between an application issuing
53
a syscall resulting in a hit to a blockdevice to the syscall returning to the
54
application.
55

    
56
The values are bound to be higher (at least for read requests) than the time
57
it takes the device itself to fulfill the requests, since calling overhead,
58
queuing times and probably a dozen other things are included in those times.
59

    
60
These are the values to watch out for when an user complains that C<the disks
61
are too slow!>.
62

    
63

    
64
=head3 What causes a block device hit?
65

    
66
A non-exhaustive list:
67

    
68
=over
69

    
70
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
71
flag is set.
72

    
73
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
74
is exceeded.
75

    
76
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
77
modification of any of the values returned by stat(2), etc.)
78

    
79
=item * The pdflush daemon writing out dirtied pages
80

    
81
=item * (f)sync
82

    
83
=item * Swapping
84

    
85
=item * raw device I/O (mkfs, dd, etc.)
86

    
87
=back
88

    
89
=head1 ACKNOWLEDGEMENTS
90

    
91
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
92
package written and maintained by Sebastien Godard.
93

    
94
=head1 SEE ALSO
95

    
96
See C<Documentation/iostats.txt> in your Linux source tree for further information
97
about the C<numbers> involved in this module.
98

    
99
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
100
about the pdflush daemon.
101

    
102
=head1 AUTHOR
103

    
104
Michael Renner <michael.renner@amd.co.at>
105

    
106
=head1 LICENSE
107

    
108
GPLv2
109

    
110

    
111
=cut
112

    
113

    
114
use strict;
115

    
116

    
117
use File::Basename;
118
use Carp;
119
use POSIX;
120

    
121
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
122
# Don't try this at home
123
eval { require Munin::Plugin; Munin::Plugin->import; };
124

    
125
if ($@) {
126
    fake_munin_plugin();
127
}
128

    
129

    
130
# Sanity check to ensure that the script is called the correct name.
131

    
132
if (basename($0) !~ /^linux_diskstat_/) {
133
    die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
134
}
135

    
136

    
137
############
138
# autoconf #
139
############
140

    
141
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
142
    my %stats;
143

    
144
    # Capture any croaks on the way
145
    eval { %stats = parse_diskstats() };
146

    
147
    if ( !$@ && keys %stats ) {
148

    
149
        print "yes\n";
150
        exit 0;
151
    }
152
    else {
153
        print "no\n";
154
        exit 1;
155
    }
156
}
157

    
158

    
159
###########
160
# suggest #
161
###########
162

    
163
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
164

    
165
    my %diskstats = parse_diskstats();
166

    
167
    my %suggested_devices;
168

    
169
  DEVICE:
170
    for my $devname ( sort keys %diskstats ) {
171

    
172
        # Skip devices without traffic
173
        next
174
          if ( $diskstats{$devname}->{'rd_ios'} == 0
175
            && $diskstats{$devname}->{'wr_ios'} == 0 );
176

    
177
        for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
178

    
179
            # Filter out devices (partitions) which are matched by existing ones
180
            # e.g. sda1 -> sda, c0d0p1 -> c0d0
181
            next DEVICE if ( $devname =~ m/$existing_device/ );
182
        }
183

    
184
        push @{ $suggested_devices{'iops'} },       $devname;
185
        push @{ $suggested_devices{'throughput'} }, $devname;
186

    
187
        # Only suggest latency graphs if the device supports it
188
        if (   $diskstats{$devname}->{'rd_ticks'} > 0
189
            || $diskstats{$devname}->{'wr_ticks'} > 0 )
190
        {
191
            push @{ $suggested_devices{'latency'} }, $devname;
192
        }
193
    }
194

    
195
    for my $mode ( keys %suggested_devices ) {
196
        for my $device ( sort @{ $suggested_devices{$mode} } ) {
197

    
198
            my $printdev = translate_device_name($device, 'TO_FS');
199
            print "${mode}_$printdev\n";
200
        }
201
    }
202

    
203
    exit 0;
204
}
205

    
206

    
207
# Reading the scripts invocation name and setting some parameters,
208
# needed from here on
209

    
210
my $basename = basename($0);
211
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
212

    
213
if ( not defined $device ) {
214
    croak "Didn't get a device name. Aborting\n";
215
}
216

    
217
$device = translate_device_name($device, 'FROM_FS');
218

    
219
##########
220
# config #
221
##########
222

    
223
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
224

    
225
    my $pretty_device = $device;
226

    
227
    if ($device =~ /^dm-\d+$/) {
228
	$pretty_device = translate_devicemapper_name($device);
229
    }
230

    
231
    if ( $mode eq 'latency' ) {
232

    
233
        print <<EOF;
234
graph_title Disk latency for /dev/$pretty_device
235
graph_args --base 1000
236
graph_category disk
237

    
238
util.label Device utilization (percent)
239
util.type GAUGE
240
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
241
util.min 0
242
svctm.label Average device IO time (ms)
243
svctm.type GAUGE
244
svctm.info Average time an I/O takes on the block device
245
svctm.min 0
246
avgwait.label Average IO Wait time (ms)
247
avgwait.type GAUGE
248
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
249
avgwait.min 0
250
avgrdwait.label Average Read IO Wait time (ms)
251
avgrdwait.type GAUGE
252
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
253
avgrdwait.min 0
254
avgwrwait.label Average Write IO Wait time (ms)
255
avgwrwait.type GAUGE
256
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
257
avgwrwait.min 0
258

    
259
EOF
260

    
261
    }
262
    elsif ( $mode eq 'throughput' ) {
263

    
264
        print <<EOF;
265
graph_title Disk throughput for /dev/$pretty_device
266
graph_args --base 1024
267
graph_vlabel Bytes/second
268
graph_category disk
269

    
270
rdbytes.label Read Bytes
271
rdbytes.type GAUGE
272
rdbytes.min 0
273
wrbytes.label Write Bytes
274
wrbytes.type GAUGE
275
wrbytes.min 0
276

    
277
EOF
278
    }
279
    elsif ( $mode eq 'iops' ) {
280

    
281
        print <<EOF;
282
graph_title Disk IOs for /dev/$pretty_device
283
graph_args --base 1000
284
graph_vlabel Units/second
285
graph_category disk
286

    
287
rdio.label Read IO/sec
288
rdio.type GAUGE
289
rdio.min 0
290
wrio.label Write IO/sec
291
wrio.type GAUGE
292
wrio.min 0
293
avgrqsz.label Average Request Size (KiB)
294
avgrqsz.type GAUGE
295
avgrqsz.min 0
296
avgrdrqsz.label Average Read Request Size (KiB)
297
avgrdrqsz.type GAUGE
298
avgrdrqsz.min 0
299
avgwrrqsz.label Average Write Request Size (KiB)
300
avgwrrqsz.type GAUGE
301
avgwrrqsz.min 0
302

    
303
EOF
304

    
305
    }
306
    else {
307
        croak "Unknown mode $mode\n";
308
    }
309
    exit 0;
310
}
311

    
312

    
313
########
314
# MAIN #
315
########
316

    
317

    
318
my %cur_diskstat = fetch_device_counters($device);
319

    
320

    
321
my ( $prev_time, %prev_diskstat ) = restore_state();
322

    
323
save_state( time(), %cur_diskstat );
324

    
325
# Probably the first run for the given device, we need state to do our job,
326
# so let's wait for the next run.
327
exit if ( not defined $prev_time or not %prev_diskstat );
328

    
329
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
330

    
331

    
332

    
333
########
334
# SUBS #
335
########
336

    
337
sub calculate_and_print_values {
338
    my ( $prev_time, $prev_stats, $cur_stats ) = @_;
339

    
340
    my $bytes_per_sector = 512;
341

    
342
    my $interval = time() - $prev_time;
343

    
344
    my $read_ios  = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
345
    my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
346

    
347
    my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
348
    my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
349

    
350
    my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
351
    my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
352

    
353
    my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
354

    
355

    
356
    my $read_io_per_sec  = $read_ios / $interval;
357
    my $write_io_per_sec = $write_ios / $interval;
358

    
359
    my $read_bytes_per_sec  = $rd_sectors / $interval * $bytes_per_sector;
360
    my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
361

    
362

    
363
    my $total_ios         = $read_ios + $write_ios;
364
    my $total_ios_per_sec = $total_ios / $interval;
365

    
366
    # Utilization - or "how busy is the device"?
367
    # If the time spent for I/O was close to 1000msec for
368
    # a given second, the device is nearly 100% saturated.
369
    my $utilization = $tot_ticks / $interval;
370

    
371
    # Average time an I/O takes on the block device
372
    my $servicetime =
373
      $total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
374

    
375
    # Average wait time for an I/O from start to finish
376
    # (includes queue times et al)
377
    my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
378
    my $average_rd_wait = $read_ios  ? $rd_ticks / $read_ios  : 0;
379
    my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
380

    
381
    my $average_rq_size_in_kb =
382
      $total_ios
383
      ? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
384
      : 0;
385
    my $average_rd_rq_size_in_kb =
386
      $read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
387
    my $average_wr_rq_size_in_kb =
388
      $write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
389

    
390
    my $util_print = $utilization / 10;
391

    
392

    
393
    if ( $mode eq 'latency' ) {
394
        print <<EOF;
395

    
396
util.value $util_print
397
svctm.value $servicetime
398
avgwait.value $average_wait
399
avgrdwait.value $average_rd_wait
400
avgwrwait.value $average_wr_wait
401

    
402
EOF
403
    }
404
    elsif ( $mode eq 'throughput' ) {
405

    
406
        print <<EOF;
407

    
408
rdbytes.value $read_bytes_per_sec
409
wrbytes.value $write_bytes_per_sec
410

    
411
EOF
412
    }
413
    elsif ( $mode eq 'iops' ) {
414

    
415
        print <<EOF;
416

    
417
rdio.value $read_io_per_sec
418
wrio.value $write_io_per_sec
419
avgrqsz.value $average_rq_size_in_kb
420
avgrdrqsz.value $average_rd_rq_size_in_kb
421
avgwrrqsz.value $average_wr_rq_size_in_kb
422

    
423
EOF
424

    
425
    }
426
    else {
427
        croak "Unknown mode $mode\n";
428
    }
429

    
430
}
431

    
432
sub read_diskstats {
433

    
434
    open STAT, '< /proc/diskstats'
435
      or croak "Failed to open '/proc/diskstats': $!\n";
436

    
437
    my @lines;
438

    
439
    for my $line (<STAT>) {
440

    
441
        # Strip trailing newline and leading whitespace
442
        chomp $line;
443
        $line =~ s/^\s+//;
444

    
445
        my @elems = split /\s+/, $line;
446

    
447
        # We explicitly don't support old-style diskstats
448
        # There are situations where only _some_ lines (e.g.
449
        # partitions on older 2.6 kernels) have fewer stats
450
        # numbers, therefore we'll skip them silently
451
        if ( @elems != 14 ) {
452
            next;
453
        }
454
        push @lines, \@elems;
455
    }
456

    
457
    close STAT or croak "Failed to close '/proc/diskstats': $!";
458
    return @lines;
459
}
460

    
461
sub read_sysfs {
462

    
463
    my ($want_device) = @_;
464

    
465
    my @devices;
466
    my @lines;
467

    
468
    if ( defined $want_device ) {
469

    
470
        # sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
471
        $want_device =~ tr#/#!#;
472
        @devices = $want_device;
473
    }
474
    else {
475
        @devices = glob "/sys/block/*/stat";
476
        @devices = map { m!/sys/block/([^/]+)/stat! } @devices;
477
    }
478

    
479

    
480
    for my $cur_device (@devices) {
481
        my $stats_file = "/sys/block/$cur_device/stat";
482

    
483
        open STAT, "< $stats_file"
484
          or croak "Failed to open '$stats_file': $!\n";
485

    
486
        my $line = <STAT>;
487

    
488
        # Trimming whitespace
489
        $line =~ s/^\s+//;
490
        chomp $line;
491

    
492
        my @elems = split /\s+/, $line;
493

    
494
        croak "'$stats_file' doesn't contain exactly 11 values. Aborting"
495
          if ( @elems != 11 );
496

    
497
        # Translate the devicename back before storing the information
498
        $cur_device =~ tr#!#/#;
499

    
500
        # Faking missing diskstats values
501
        unshift @elems, ( '', '', $cur_device );
502

    
503
        push @lines, \@elems;
504

    
505
        close STAT or croak "Failed to close '$stats_file': $!\n";
506
    }
507

    
508
    return @lines;
509
}
510

    
511

    
512
sub parse_diskstats {
513

    
514
    my ($want_device) = @_;
515

    
516
    my @stats;
517

    
518
    if ( glob "/sys/block/*/stat" ) {
519

    
520
        @stats = read_sysfs($want_device);
521
    }
522
    else {
523
        @stats = read_diskstats();
524
    }
525

    
526
    my %diskstats;
527

    
528
    for my $entry (@stats) {
529

    
530
        my %devstat;
531

    
532
        # Hash-Slicing for fun and profit
533
        @devstat{
534
            qw(major minor devname
535
              rd_ios rd_merges rd_sectors rd_ticks
536
              wr_ios wr_merges wr_sectors wr_ticks
537
              ios_in_prog tot_ticks rq_ticks)
538
          }
539
          = @{$entry};
540

    
541
        $diskstats{ $devstat{'devname'} } = \%devstat;
542
    }
543

    
544
    return %diskstats;
545
}
546

    
547
sub fetch_device_counters {
548

    
549
    my ($want_device) = @_;
550

    
551
    my %diskstats = parse_diskstats($want_device);
552

    
553
    for my $devname ( keys %diskstats ) {
554

    
555
        if ( $want_device eq $devname ) {
556
            return %{ $diskstats{$devname} };
557
        }
558
    }
559
    return undef;
560
}
561

    
562

    
563
# We use '+' (and formerly '-') as placeholder for '/' in device-names
564
# used as calling name for the script.
565
sub translate_device_name {
566

    
567
    my ($device, $mode) = @_;
568

    
569
    if ($mode eq 'FROM_FS') {
570

    
571
	# Hackaround to mitigate issues with unwisely chosen former separator
572
	if ( not ($device =~ m/dm-\d+/)) {
573
	    $device =~ tr#-+#//#;
574
	}
575

    
576
    }
577
    elsif ($mode eq 'TO_FS') {
578

    
579
	$device =~ tr#/#+#;
580

    
581
    }
582
    else {
583
	croak "translate_device_name: Unknown mode\n";
584
    }
585

    
586
    return $device;
587
}
588

    
589

    
590
sub fake_munin_plugin {
591
    my $eval_code = <<'EOF';
592

    
593
use Storable;
594
my $storable_filename = basename($0);
595
$storable_filename = "/tmp/munin-state-$storable_filename";
596

    
597
sub save_state {
598
    my @state = @_;
599

    
600
    if ( not -e $storable_filename or -f $storable_filename ) {
601
	store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
602
    }
603
    else {
604
	croak "$storable_filename is probably not a regular file. Please delete it.\n";
605
    }
606
}
607

    
608
sub restore_state {
609

    
610
    if (-f $storable_filename) {
611
	my $state = retrieve($storable_filename);
612
	return @{$state};
613
    }
614
    else {
615
	return undef;
616
    }
617
}
618
EOF
619

    
620
    eval($eval_code);
621
}
622

    
623
sub translate_devicemapper_name {
624
        my ($device) = @_;
625

    
626
        my ($want_minor) = $device =~ m/^dm-(\d+)$/;
627

    
628
        croak "Failed to extract devicemapper id" unless defined ($want_minor);
629

    
630
	my $dm_major = find_devicemapper_major();
631
	croak "Failed to get device-mapper major number\n" unless defined $dm_major;
632

    
633
        for my $entry (glob "/dev/mapper/\*") {
634

    
635
                my $rdev = (stat($entry))[6];
636
                my $major = floor($rdev / 256);
637
                my $minor = $rdev % 256;
638

    
639
                if ($major == $dm_major && $minor == $want_minor) {
640

    
641
                        my $pretty_name = translate_lvm_name($entry);
642

    
643
                        return defined $pretty_name ? $pretty_name : $entry;
644

    
645
                }
646
        }
647
        # Return original string if the device can't be found.
648
        return $device;
649
}
650

    
651

    
652

    
653
sub translate_lvm_name {
654

    
655
        my ($entry) = @_;
656

    
657
        my $device_name = basename($entry);
658

    
659
        # Check for single-dash-occurrence to see if this could be a lvm devicemapper device.
660
        if ($device_name =~ m/(?<!-)-(?!-)/) {
661

    
662
                # split device name into vg and lv parts
663
                my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
664
                return undef unless ( defined($vg) && defined($lv) );
665

    
666
                # remove extraneous dashes from vg and lv names
667
                $vg =~ s/--/-/g;
668
                $lv =~ s/--/-/g;
669

    
670
                $device_name = "$vg/$lv";
671

    
672
                # Sanity check - does the constructed device name exist?
673
                if (stat("/dev/$device_name")) {
674
                        return "$device_name";
675
                }
676

    
677
        }
678
        return undef;
679
}
680

    
681
sub find_devicemapper_major {
682

    
683
    open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
684

    
685
    my $dm_major;
686

    
687
    for my $line (<FH>) {
688
	chomp $line;
689

    
690
	my ($major, $name) = split /\s+/, $line, 2;
691

    
692
	next unless defined $name;
693

    
694
	if ($name eq 'device-mapper') {
695
	    $dm_major = $major;
696
	    last;
697
	}
698
    }
699
    close(FH);
700

    
701
    return $dm_major;
702
}