Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / disk / linux_diskstat_ @ 64089240

Historique | Voir | Annoter | Télécharger (16,4 ko)

1
#!/usr/bin/perl -w
2
# vim: sts=4 sw=4 ts=8
3

    
4
# Munin markers:
5
#%# family=auto
6
#%# capabilities=autoconf suggest
7

    
8
# Author: Michael Renner <michael.renner@amd.co.at>
9

    
10
# Version: 0.0.5, 2009-05-22
11

    
12

    
13

    
14
=head1 NAME
15

    
16
linux_diskstat_ - Munin plugin to monitor various values provided
17
via C</proc/diskstats>
18

    
19
=head1 APPLICABLE SYSTEMS
20

    
21
Linux 2.6 systems with extended block device statistics enabled.
22

    
23

    
24
=head1 INTERPRETATION
25

    
26
Among the more self-describing or well-known values like C<throughput>
27
(Bytes per second) there are a few which might need further introduction.
28

    
29

    
30
=head2 Device Utilization
31

    
32
Linux provides a counter which increments in a millisecond-interval for as long
33
as there are outstanding I/O requests. If this counter is close to 1000msec
34
in a given 1 second timeframe the device is nearly 100% saturated. This plugin
35
provides values averaged over a 5 minute time frame per default, so it can't
36
catch short-lived saturations, but it'll give a nice trend for semi-uniform
37
load patterns as they're expected in most server or multi-user environments.
38

    
39

    
40
=head2 Device IO Time
41

    
42
The C<Device IO Time> takes the counter described under C<Device Utilization>
43
and divides it by the number of I/Os that happened in the given time frame,
44
resulting in an average time per I/O on the block-device level.
45

    
46
This value can give you a good comparison base amongst different controllers,
47
storage subsystems and disks for similar workloads.
48

    
49

    
50
=head2 Syscall Wait Time
51

    
52
These values describe the average time it takes between an application issuing
53
a syscall resulting in a hit to a blockdevice to the syscall returning to the
54
application.
55

    
56
The values are bound to be higher (at least for read requests) than the time
57
it takes the device itself to fulfill the requests, since calling overhead,
58
queuing times and probably a dozen other things are included in those times.
59

    
60
These are the values to watch out for when an user complains that C<the disks
61
are too slow!>.
62

    
63

    
64
=head3 What causes a block device hit?
65

    
66
A non-exhaustive list:
67

    
68
=over
69

    
70
=item * Reads from files when the given range is not in the page cache or the O_DIRECT
71
flag is set.
72

    
73
=item * Writes to files if O_DIRECT or O_SYNC is set or sys.vm.dirty_(background_)ratio
74
is exceeded.
75

    
76
=item * Filesystem metadata operations (stat(2), getdents(2), file creation,
77
modification of any of the values returned by stat(2), etc.)
78

    
79
=item * The pdflush daemon writing out dirtied pages
80

    
81
=item * (f)sync
82

    
83
=item * Swapping
84

    
85
=item * raw device I/O (mkfs, dd, etc.)
86

    
87
=back
88

    
89
=head1 ACKNOWLEDGEMENTS
90

    
91
The core logic of this script is based on the B<iostat> tool of the B<sysstat>
92
package written and maintained by Sebastien Godard.
93

    
94
=head1 SEE ALSO
95

    
96
See C<Documentation/iostats.txt> in your Linux source tree for further information
97
about the C<numbers> involved in this module.
98

    
99
L<http://www.westnet.com/~gsmith/content/linux-pdflush.htm> has a nice writeup
100
about the pdflush daemon.
101

    
102
=head1 AUTHOR
103

    
104
Michael Renner <michael.renner@amd.co.at>
105

    
106
=head1 LICENSE
107

    
108
GPLv2
109

    
110

    
111
=cut
112

    
113

    
114
use strict;
115

    
116

    
117
use File::Basename;
118
use Carp;
119
use POSIX;
120

    
121
# We load our own version of save/restore_state if Munin::Plugin is unavailable.
122
# Don't try this at home
123
eval { require Munin::Plugin; Munin::Plugin->import; };
124

    
125
if ($@) {
126
    fake_munin_plugin();
127
}
128

    
129

    
130
# Sanity check to ensure that the script is called the correct name.
131

    
132
if (basename($0) !~ /^linux_diskstat_/) {
133
    die qq(Please ensure that the name of the script and it's symlinks starts with "linux_diskstat_"\n);
134
}
135

    
136

    
137
############
138
# autoconf #
139
############
140

    
141
if ( defined $ARGV[0] && $ARGV[0] eq 'autoconf' ) {
142
    my %stats;
143

    
144
    # Capture any croaks on the way
145
    eval { %stats = parse_diskstats() };
146

    
147
    if ( !$@ && keys %stats ) {
148

    
149
        print "yes\n";
150
    }
151
    else {
152
        print "no\n";
153
    }
154
    exit 0;
155
}
156

    
157

    
158
###########
159
# suggest #
160
###########
161

    
162
if ( defined $ARGV[0] && $ARGV[0] eq 'suggest' ) {
163

    
164
    my %diskstats = parse_diskstats();
165

    
166
    my %suggested_devices;
167

    
168
  DEVICE:
169
    for my $devname ( sort keys %diskstats ) {
170

    
171
        # Skip devices without traffic
172
        next
173
          if ( $diskstats{$devname}->{'rd_ios'} == 0
174
            && $diskstats{$devname}->{'wr_ios'} == 0 );
175

    
176
        for my $existing_device ( @{ $suggested_devices{'iops'} } ) {
177

    
178
            # Filter out devices (partitions) which are matched by existing ones
179
            # e.g. sda1 -> sda, c0d0p1 -> c0d0
180
            next DEVICE if ( $devname =~ m/$existing_device/ );
181
        }
182

    
183
        push @{ $suggested_devices{'iops'} },       $devname;
184
        push @{ $suggested_devices{'throughput'} }, $devname;
185

    
186
        # Only suggest latency graphs if the device supports it
187
        if (   $diskstats{$devname}->{'rd_ticks'} > 0
188
            || $diskstats{$devname}->{'wr_ticks'} > 0 )
189
        {
190
            push @{ $suggested_devices{'latency'} }, $devname;
191
        }
192
    }
193

    
194
    for my $mode ( keys %suggested_devices ) {
195
        for my $device ( sort @{ $suggested_devices{$mode} } ) {
196

    
197
            my $printdev = translate_device_name($device, 'TO_FS');
198
            print "${mode}_$printdev\n";
199
        }
200
    }
201

    
202
    exit 0;
203
}
204

    
205

    
206
# Reading the scripts invocation name and setting some parameters,
207
# needed from here on
208

    
209
my $basename = basename($0);
210
my ( $mode, $device ) = $basename =~ m/linux_diskstat_(\w+)_([-+\w]+)$/;
211

    
212
if ( not defined $device ) {
213
    croak "Didn't get a device name. Aborting\n";
214
}
215

    
216
$device = translate_device_name($device, 'FROM_FS');
217

    
218
##########
219
# config #
220
##########
221

    
222
if ( defined $ARGV[0] && $ARGV[0] eq 'config' ) {
223

    
224
    my $pretty_device = $device;
225

    
226
    if ($device =~ /^dm-\d+$/) {
227
	$pretty_device = translate_devicemapper_name($device);
228
    }
229

    
230
    if ( $mode eq 'latency' ) {
231

    
232
        print <<EOF;
233
graph_title Disk latency for /dev/$pretty_device
234
graph_args --base 1000
235
graph_category disk
236

    
237
util.label Device utilization (percent)
238
util.type GAUGE
239
util.info Utilization of the device. If the time spent for I/O is close to 1000msec for a given second, the device is nearly 100% saturated.
240
util.min 0
241
svctm.label Average device IO time (ms)
242
svctm.type GAUGE
243
svctm.info Average time an I/O takes on the block device
244
svctm.min 0
245
avgwait.label Average IO Wait time (ms)
246
avgwait.type GAUGE
247
avgwait.info Average wait time for an I/O from request start to finish (includes queue times et al)
248
avgwait.min 0
249
avgrdwait.label Average Read IO Wait time (ms)
250
avgrdwait.type GAUGE
251
avgrdwait.info Average wait time for a read I/O from request start to finish (includes queue times et al)
252
avgrdwait.min 0
253
avgwrwait.label Average Write IO Wait time (ms)
254
avgwrwait.type GAUGE
255
avgwrwait.info Average wait time for a write I/O from request start to finish (includes queue times et al)
256
avgwrwait.min 0
257

    
258
EOF
259

    
260
    }
261
    elsif ( $mode eq 'throughput' ) {
262

    
263
        print <<EOF;
264
graph_title Disk throughput for /dev/$pretty_device
265
graph_args --base 1024
266
graph_vlabel Bytes/second
267
graph_category disk
268

    
269
rdbytes.label Read Bytes
270
rdbytes.type GAUGE
271
rdbytes.min 0
272
wrbytes.label Write Bytes
273
wrbytes.type GAUGE
274
wrbytes.min 0
275

    
276
EOF
277
    }
278
    elsif ( $mode eq 'iops' ) {
279

    
280
        print <<EOF;
281
graph_title Disk IOs for /dev/$pretty_device
282
graph_args --base 1000
283
graph_vlabel Units/second
284
graph_category disk
285

    
286
rdio.label Read IO/sec
287
rdio.type GAUGE
288
rdio.min 0
289
wrio.label Write IO/sec
290
wrio.type GAUGE
291
wrio.min 0
292
avgrqsz.label Average Request Size (KiB)
293
avgrqsz.type GAUGE
294
avgrqsz.min 0
295
avgrdrqsz.label Average Read Request Size (KiB)
296
avgrdrqsz.type GAUGE
297
avgrdrqsz.min 0
298
avgwrrqsz.label Average Write Request Size (KiB)
299
avgwrrqsz.type GAUGE
300
avgwrrqsz.min 0
301

    
302
EOF
303

    
304
    }
305
    else {
306
        croak "Unknown mode $mode\n";
307
    }
308
    exit 0;
309
}
310

    
311

    
312
########
313
# MAIN #
314
########
315

    
316

    
317
my %cur_diskstat = fetch_device_counters($device);
318

    
319

    
320
my ( $prev_time, %prev_diskstat ) = restore_state();
321

    
322
save_state( time(), %cur_diskstat );
323

    
324
# Probably the first run for the given device, we need state to do our job,
325
# so let's wait for the next run.
326
exit if ( not defined $prev_time or not %prev_diskstat );
327

    
328
calculate_and_print_values( $prev_time, \%prev_diskstat, \%cur_diskstat );
329

    
330

    
331

    
332
########
333
# SUBS #
334
########
335

    
336
sub calculate_and_print_values {
337
    my ( $prev_time, $prev_stats, $cur_stats ) = @_;
338

    
339
    my $bytes_per_sector = 512;
340

    
341
    my $interval = time() - $prev_time;
342

    
343
    my $read_ios  = $cur_stats->{'rd_ios'} - $prev_stats->{'rd_ios'};
344
    my $write_ios = $cur_stats->{'wr_ios'} - $prev_stats->{'wr_ios'};
345

    
346
    my $rd_ticks = $cur_stats->{'rd_ticks'} - $prev_stats->{'rd_ticks'};
347
    my $wr_ticks = $cur_stats->{'wr_ticks'} - $prev_stats->{'wr_ticks'};
348

    
349
    my $rd_sectors = $cur_stats->{'rd_sectors'} - $prev_stats->{'rd_sectors'};
350
    my $wr_sectors = $cur_stats->{'wr_sectors'} - $prev_stats->{'wr_sectors'};
351

    
352
    my $tot_ticks = $cur_stats->{'tot_ticks'} - $prev_stats->{'tot_ticks'};
353

    
354

    
355
    my $read_io_per_sec  = $read_ios / $interval;
356
    my $write_io_per_sec = $write_ios / $interval;
357

    
358
    my $read_bytes_per_sec  = $rd_sectors / $interval * $bytes_per_sector;
359
    my $write_bytes_per_sec = $wr_sectors / $interval * $bytes_per_sector;
360

    
361

    
362
    my $total_ios         = $read_ios + $write_ios;
363
    my $total_ios_per_sec = $total_ios / $interval;
364

    
365
    # Utilization - or "how busy is the device"?
366
    # If the time spent for I/O was close to 1000msec for
367
    # a given second, the device is nearly 100% saturated.
368
    my $utilization = $tot_ticks / $interval;
369

    
370
    # Average time an I/O takes on the block device
371
    my $servicetime =
372
      $total_ios_per_sec ? $utilization / $total_ios_per_sec : 0;
373

    
374
    # Average wait time for an I/O from start to finish
375
    # (includes queue times et al)
376
    my $average_wait = $total_ios ? ( $rd_ticks + $wr_ticks ) / $total_ios : 0;
377
    my $average_rd_wait = $read_ios  ? $rd_ticks / $read_ios  : 0;
378
    my $average_wr_wait = $write_ios ? $wr_ticks / $write_ios : 0;
379

    
380
    my $average_rq_size_in_kb =
381
      $total_ios
382
      ? ( $rd_sectors + $wr_sectors ) * $bytes_per_sector / 1024 / $total_ios
383
      : 0;
384
    my $average_rd_rq_size_in_kb =
385
      $read_ios ? $rd_sectors * $bytes_per_sector / 1024 / $read_ios : 0;
386
    my $average_wr_rq_size_in_kb =
387
      $write_ios ? $wr_sectors * $bytes_per_sector / 1024 / $write_ios : 0;
388

    
389
    my $util_print = $utilization / 10;
390

    
391

    
392
    if ( $mode eq 'latency' ) {
393
        print <<EOF;
394

    
395
util.value $util_print
396
svctm.value $servicetime
397
avgwait.value $average_wait
398
avgrdwait.value $average_rd_wait
399
avgwrwait.value $average_wr_wait
400

    
401
EOF
402
    }
403
    elsif ( $mode eq 'throughput' ) {
404

    
405
        print <<EOF;
406

    
407
rdbytes.value $read_bytes_per_sec
408
wrbytes.value $write_bytes_per_sec
409

    
410
EOF
411
    }
412
    elsif ( $mode eq 'iops' ) {
413

    
414
        print <<EOF;
415

    
416
rdio.value $read_io_per_sec
417
wrio.value $write_io_per_sec
418
avgrqsz.value $average_rq_size_in_kb
419
avgrdrqsz.value $average_rd_rq_size_in_kb
420
avgwrrqsz.value $average_wr_rq_size_in_kb
421

    
422
EOF
423

    
424
    }
425
    else {
426
        croak "Unknown mode $mode\n";
427
    }
428

    
429
}
430

    
431
sub read_diskstats {
432

    
433
    open STAT, '< /proc/diskstats'
434
      or croak "Failed to open '/proc/diskstats': $!\n";
435

    
436
    my @lines;
437

    
438
    for my $line (<STAT>) {
439

    
440
        # Strip trailing newline and leading whitespace
441
        chomp $line;
442
        $line =~ s/^\s+//;
443

    
444
        my @elems = split /\s+/, $line;
445

    
446
        # We explicitly don't support old-style diskstats
447
        # There are situations where only _some_ lines (e.g.
448
        # partitions on older 2.6 kernels) have fewer stats
449
        # numbers, therefore we'll skip them silently
450
        if ( @elems != 14 ) {
451
            next;
452
        }
453
        push @lines, \@elems;
454
    }
455

    
456
    close STAT or croak "Failed to close '/proc/diskstats': $!";
457
    return @lines;
458
}
459

    
460
sub read_sysfs {
461

    
462
    my ($want_device) = @_;
463

    
464
    my @devices;
465
    my @lines;
466

    
467
    if ( defined $want_device ) {
468

    
469
        # sysfs uses '!' as replacement for '/', e.g. cciss!c0d0
470
        $want_device =~ tr#/#!#;
471
        @devices = $want_device;
472
    }
473
    else {
474
        @devices = glob "/sys/block/*/stat";
475
        @devices = map { m!/sys/block/([^/]+)/stat! } @devices;
476
    }
477

    
478

    
479
    for my $cur_device (@devices) {
480
        my $stats_file = "/sys/block/$cur_device/stat";
481

    
482
        open STAT, "< $stats_file"
483
          or croak "Failed to open '$stats_file': $!\n";
484

    
485
        my $line = <STAT>;
486

    
487
        # Trimming whitespace
488
        $line =~ s/^\s+//;
489
        chomp $line;
490

    
491
        my @elems = split /\s+/, $line;
492

    
493
        croak "'$stats_file' doesn't contain exactly 11 or 15 values. Aborting"
494
          if ( @elems != 11 && @elems != 15 );
495

    
496
        # Translate the devicename back before storing the information
497
        $cur_device =~ tr#!#/#;
498

    
499
        # Faking missing diskstats values
500
        unshift @elems, ( -1, -1, $cur_device );
501

    
502
        push @lines, \@elems;
503

    
504
        close STAT or croak "Failed to close '$stats_file': $!\n";
505
    }
506

    
507
    return @lines;
508
}
509

    
510

    
511
sub parse_diskstats {
512

    
513
    my ($want_device) = @_;
514

    
515
    my @stats;
516

    
517
    if ( glob "/sys/block/*/stat" ) {
518

    
519
        @stats = read_sysfs($want_device);
520
    }
521
    else {
522
        @stats = read_diskstats();
523
    }
524

    
525
    my %diskstats;
526

    
527
    for my $entry (@stats) {
528

    
529
        my %devstat;
530

    
531
        # Hash-Slicing for fun and profit
532
        @devstat{
533
            qw(major minor devname
534
              rd_ios rd_merges rd_sectors rd_ticks
535
              wr_ios wr_merges wr_sectors wr_ticks
536
              ios_in_prog tot_ticks rq_ticks)
537
          }
538
          = @{$entry};
539

    
540
        $diskstats{ $devstat{'devname'} } = \%devstat;
541
    }
542

    
543
    return %diskstats;
544
}
545

    
546
sub fetch_device_counters {
547

    
548
    my ($want_device) = @_;
549

    
550
    my %diskstats = parse_diskstats($want_device);
551

    
552
    for my $devname ( keys %diskstats ) {
553

    
554
        if ( $want_device eq $devname ) {
555
            return %{ $diskstats{$devname} };
556
        }
557
    }
558
    return undef;
559
}
560

    
561

    
562
# We use '+' (and formerly '-') as placeholder for '/' in device-names
563
# used as calling name for the script.
564
sub translate_device_name {
565

    
566
    my ($device, $mode) = @_;
567

    
568
    if ($mode eq 'FROM_FS') {
569

    
570
	# Hackaround to mitigate issues with unwisely chosen former separator
571
	if ( not ($device =~ m/dm-\d+/)) {
572
	    $device =~ tr#-+#//#;
573
	}
574

    
575
    }
576
    elsif ($mode eq 'TO_FS') {
577

    
578
	$device =~ tr#/#+#;
579

    
580
    }
581
    else {
582
	croak "translate_device_name: Unknown mode\n";
583
    }
584

    
585
    return $device;
586
}
587

    
588

    
589
sub fake_munin_plugin {
590
    my $eval_code = <<'EOF';
591

    
592
use Storable;
593
my $storable_filename = basename($0);
594
$storable_filename = "/tmp/munin-state-$storable_filename";
595

    
596
sub save_state {
597
    my @state = @_;
598

    
599
    if ( not -e $storable_filename or -f $storable_filename ) {
600
	store \@state, $storable_filename or croak "Failed to persist state to '$storable_filename': $!\n";
601
    }
602
    else {
603
	croak "$storable_filename is probably not a regular file. Please delete it.\n";
604
    }
605
}
606

    
607
sub restore_state {
608

    
609
    if (-f $storable_filename) {
610
	my $state = retrieve($storable_filename);
611
	return @{$state};
612
    }
613
    else {
614
	return undef;
615
    }
616
}
617
EOF
618

    
619
    eval($eval_code);
620
}
621

    
622
sub translate_devicemapper_name {
623
        my ($device) = @_;
624

    
625
        my ($want_minor) = $device =~ m/^dm-(\d+)$/;
626

    
627
        croak "Failed to extract devicemapper id" unless defined ($want_minor);
628

    
629
	my $dm_major = find_devicemapper_major();
630
	croak "Failed to get device-mapper major number\n" unless defined $dm_major;
631

    
632
        for my $entry (glob "/dev/mapper/\*") {
633

    
634
                my $rdev = (stat($entry))[6];
635
                my $major = floor($rdev / 256);
636
                my $minor = $rdev % 256;
637

    
638
                if ($major == $dm_major && $minor == $want_minor) {
639

    
640
                        my $pretty_name = translate_lvm_name($entry);
641

    
642
                        return defined $pretty_name ? $pretty_name : $entry;
643

    
644
                }
645
        }
646
        # Return original string if the device can't be found.
647
        return $device;
648
}
649

    
650

    
651

    
652
sub translate_lvm_name {
653

    
654
        my ($entry) = @_;
655

    
656
        my $device_name = basename($entry);
657

    
658
        # Check for single-dash-occurrence to see if this could be a lvm devicemapper device.
659
        if ($device_name =~ m/(?<!-)-(?!-)/) {
660

    
661
                # split device name into vg and lv parts
662
                my ($vg, $lv) = split /(?<!-)-(?!-)/, $device_name, 2;
663
                return undef unless ( defined($vg) && defined($lv) );
664

    
665
                # remove extraneous dashes from vg and lv names
666
                $vg =~ s/--/-/g;
667
                $lv =~ s/--/-/g;
668

    
669
                $device_name = "$vg/$lv";
670

    
671
                # Sanity check - does the constructed device name exist?
672
                if (stat("/dev/$device_name")) {
673
                        return "$device_name";
674
                }
675

    
676
        }
677
        return undef;
678
}
679

    
680
sub find_devicemapper_major {
681

    
682
    open (FH, '< /proc/devices') or croak "Failed to open '/proc/devices': $!";
683

    
684
    my $dm_major;
685

    
686
    for my $line (<FH>) {
687
	chomp $line;
688

    
689
	my ($major, $name) = split /\s+/, $line, 2;
690

    
691
	next unless defined $name;
692

    
693
	if ($name eq 'device-mapper') {
694
	    $dm_major = $major;
695
	    last;
696
	}
697
    }
698
    close(FH);
699

    
700
    return $dm_major;
701
}