root / plugins / disk / raid @ 09b88141
Historique | Voir | Annoter | Télécharger (4,47 ko)
| 1 | 6e292519 | Nathan Rutman | #!/usr/bin/perl -w |
|---|---|---|---|
| 2 | 17f78427 | Lars Kruse | # |
| 3 | 6e292519 | Nathan Rutman | # (c) 2007 Nathan Rutman nathan@clusterfs.com |
| 4 | 17f78427 | Lars Kruse | # |
| 5 | # Plugin to monitor RAID status |
||
| 6 | 6e292519 | Nathan Rutman | # |
| 7 | # Results are % of healthy drives in a raid device |
||
| 8 | 17f78427 | Lars Kruse | # and % rebuilt of devices that are resyncing. |
| 9 | 6e292519 | Nathan Rutman | # |
| 10 | #%# family=contrib |
||
| 11 | #%# capabilities=autoconf |
||
| 12 | |||
| 13 | 0f972e1f | Nagy Elemer Karoly | if ($ARGV[0] and $ARGV[0] eq "autoconf") {
|
| 14 | if (-r "/proc/mdstat" and `grep md /proc/mdstat`) {
|
||
| 15 | 2e0acaca | Ken-ichi Mito | print "yes\n"; |
| 16 | 0f972e1f | Nagy Elemer Karoly | } else {
|
| 17 | 2ec4e4c1 | Lars Kruse | print "no (no RAID devices found)\n"; |
| 18 | 6e292519 | Nathan Rutman | } |
| 19 | 2ec4e4c1 | Lars Kruse | exit 0; |
| 20 | 6e292519 | Nathan Rutman | } |
| 21 | |||
| 22 | 0f972e1f | Nagy Elemer Karoly | if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
| 23 | 6e292519 | Nathan Rutman | print "graph_title RAID status\n"; |
| 24 | print "graph_category disk\n"; |
||
| 25 | print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n"; |
||
| 26 | print "graph_args --base 1000 -l 0\n"; |
||
| 27 | 27d5ca53 | Aaron Parecki | print "graph_vlabel % healthy/rebuilt\n"; |
| 28 | 6e292519 | Nathan Rutman | print "graph_scale no\n"; |
| 29 | } |
||
| 30 | |||
| 31 | 2e0acaca | Ken-ichi Mito | open(my $mdstat, "/proc/mdstat"); |
| 32 | 2670e4dc | Ken-ichi Mito | my(@text) = <$mdstat>; |
| 33 | # contents of <$mdstat> may be changed at next reading, so fetch the contents at a time |
||
| 34 | 2e0acaca | Ken-ichi Mito | close($mdstat); |
| 35 | 6e292519 | Nathan Rutman | |
| 36 | 2670e4dc | Ken-ichi Mito | my($devinfo_re, $devstat_re, $action_re) = ( |
| 37 | 9845279a | Ken-ichi Mito | '(md\d+)\s+:\s+active\s+(\(read-only\)\s+|\(auto-read-only\)\s+|)(\w+)\s+(.*)', |
| 38 | 2670e4dc | Ken-ichi Mito | '.*\[(\d+)\/(\d+)]\s+\[(\w+)]', |
| 39 | 6284302a | Ken-ichi Mito | '.*(reshape|check|resync|recovery)\s*=\s*(\d+\.\d+%|\w+)(.*finish=(.*min))?', |
| 40 | 2670e4dc | Ken-ichi Mito | ); |
| 41 | 9845279a | Ken-ichi Mito | # Interestingly, swap is presented as "active (auto-read-only)" |
| 42 | # and mdadm has '--readonly' option to make the array 'active (read-only)' |
||
| 43 | 2670e4dc | Ken-ichi Mito | |
| 44 | bbec1ffb | Dobrica Pavlinusic | my($dev, $ro, $type, $members, $failed, $nmem, $nact, $status, $action, $proc, $minute); |
| 45 | 2670e4dc | Ken-ichi Mito | while (@text) {
|
| 46 | my $line = shift @text; |
||
| 47 | if ($line =~ /$devinfo_re/) {
|
||
| 48 | # first line should like "active raid1 sda1[0] sdc1[2] sdb1[1]" |
||
| 49 | 760b14f5 | Ken-ichi Mito | $dev = $1; |
| 50 | 6284302a | Ken-ichi Mito | $ro = $2 || ''; |
| 51 | 2670e4dc | Ken-ichi Mito | $type = $3; |
| 52 | $members = $4; |
||
| 53 | bbec1ffb | Dobrica Pavlinusic | $failed = $members; |
| 54 | $failed =~ s/[^F]+//g; |
||
| 55 | $failed = length($failed); |
||
| 56 | 17f78427 | Lars Kruse | |
| 57 | 2670e4dc | Ken-ichi Mito | $line = shift @text; |
| 58 | if ($line =~ /$devstat_re/) {
|
||
| 59 | # second line should like "123456 blocks super 1.2 [2/2] [UU]" |
||
| 60 | $nmem = $1; |
||
| 61 | $nact = $2; |
||
| 62 | $status = $3; |
||
| 63 | } |
||
| 64 | else {
|
||
| 65 | b805f7a6 | Dr. Nagy Elemér Károly | # second line did not exist on /proc/mdstat |
| 66 | 2670e4dc | Ken-ichi Mito | next; |
| 67 | } |
||
| 68 | 17f78427 | Lars Kruse | |
| 69 | 2670e4dc | Ken-ichi Mito | $line = shift @text; |
| 70 | if ($line =~ /$action_re/) {
|
||
| 71 | # third line should like " [==>..................] check = 10.0% (12345/123456) finish=123min speed=12345/sec" |
||
| 72 | # this line will appear only when the array is in action |
||
| 73 | 760b14f5 | Ken-ichi Mito | $action = $1; |
| 74 | my $percent = $2; |
||
| 75 | 6284302a | Ken-ichi Mito | $minute = $4 || ''; |
| 76 | 760b14f5 | Ken-ichi Mito | if ($percent =~ /(\d+\.\d+)%/) {
|
| 77 | $proc = $1; |
||
| 78 | } |
||
| 79 | else {
|
||
| 80 | # 'resync=DELAYED' or 'resync=PENDING' |
||
| 81 | 6284302a | Ken-ichi Mito | $action .= " ($percent)"; |
| 82 | 760b14f5 | Ken-ichi Mito | $proc = -1; |
| 83 | } |
||
| 84 | 2670e4dc | Ken-ichi Mito | } |
| 85 | else {
|
||
| 86 | # array is not in action |
||
| 87 | 760b14f5 | Ken-ichi Mito | $action = 'idle'; |
| 88 | 6284302a | Ken-ichi Mito | $minute = ''; |
| 89 | 2670e4dc | Ken-ichi Mito | unshift(@text, $line); |
| 90 | } |
||
| 91 | } |
||
| 92 | else {
|
||
| 93 | # skip until first line is found |
||
| 94 | next; |
||
| 95 | } |
||
| 96 | |||
| 97 | 2e0acaca | Ken-ichi Mito | if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
| 98 | print "$dev.label $dev\n"; |
||
| 99 | 6284302a | Ken-ichi Mito | print "$dev.info $type $ro$members\n"; |
| 100 | 2e0acaca | Ken-ichi Mito | # 100: means less than 100 |
| 101 | # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. |
||
| 102 | print "$dev.critical 98:\n"; |
||
| 103 | 6284302a | Ken-ichi Mito | print $dev, "_rebuild.label $dev reshape/recovery\n"; |
| 104 | print $dev, "_rebuild.info $action $minute\n"; |
||
| 105 | 2e0acaca | Ken-ichi Mito | # Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. |
| 106 | print $dev, "_rebuild.critical 98:\n"; |
||
| 107 | print $dev, "_check.label $dev check/resync \n"; |
||
| 108 | 6284302a | Ken-ichi Mito | print $dev, "_check.info $action $minute\n"; |
| 109 | bbec1ffb | Dobrica Pavlinusic | print $dev, "_failed.label $dev failed disks \n"; |
| 110 | print $dev, "_failed.info $action $minute\n"; |
||
| 111 | print $dev, "_failed.critical 0:0\n"; |
||
| 112 | 2e0acaca | Ken-ichi Mito | } else {
|
| 113 | my $pct = 100 * $nact / $nmem; |
||
| 114 | my $rpct = 100; |
||
| 115 | 760b14f5 | Ken-ichi Mito | my $cpct = 100; |
| 116 | if ($action =~ /reshape|recovery/) {
|
||
| 117 | $rpct = $proc; |
||
| 118 | $cpct = 0; # check/resync is not done |
||
| 119 | 2e0acaca | Ken-ichi Mito | } |
| 120 | 760b14f5 | Ken-ichi Mito | elsif ($action =~ /check|resync/) {
|
| 121 | if ($proc < 0) {
|
||
| 122 | # array is on DELAYED or PENDING, further info is unknown |
||
| 123 | $rpct = 0; |
||
| 124 | $cpct = 0; |
||
| 125 | } |
||
| 126 | else {
|
||
| 127 | # reshape/recovery was done, $rpct => 100 |
||
| 128 | $cpct = $proc; |
||
| 129 | } |
||
| 130 | 2e0acaca | Ken-ichi Mito | } |
| 131 | 760b14f5 | Ken-ichi Mito | |
| 132 | 2e0acaca | Ken-ichi Mito | print "$dev.value $pct\n"; |
| 133 | print $dev, "_rebuild.value $rpct\n"; |
||
| 134 | print $dev, "_check.value $cpct\n"; |
||
| 135 | bbec1ffb | Dobrica Pavlinusic | print $dev, "_failed.value $failed\n"; |
| 136 | 2e0acaca | Ken-ichi Mito | } |
| 137 | 6e292519 | Nathan Rutman | } |
| 138 | |||
| 139 | exit 0; |
