root / plugins / disk / raid @ 9154029b
Historique | Voir | Annoter | Télécharger (2,21 ko)
| 1 |
#!/usr/bin/perl -w |
|---|---|
| 2 |
# |
| 3 |
# (c) 2007 Nathan Rutman nathan@clusterfs.com |
| 4 |
# |
| 5 |
# Plugin to monitor RAID status |
| 6 |
# |
| 7 |
# Results are % of healthy drives in a raid device |
| 8 |
# and % rebuilt of devices that are resyncing. |
| 9 |
# |
| 10 |
#%# family=contrib |
| 11 |
#%# capabilities=autoconf |
| 12 |
|
| 13 |
if ($ARGV[0] and $ARGV[0] eq "autoconf") {
|
| 14 |
if (-r "/proc/mdstat" and `grep md /proc/mdstat`) {
|
| 15 |
print "yes\n"; |
| 16 |
exit 0; |
| 17 |
} else {
|
| 18 |
print "no RAID devices\n"; |
| 19 |
exit 1; |
| 20 |
} |
| 21 |
} |
| 22 |
|
| 23 |
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
| 24 |
print "graph_title RAID status\n"; |
| 25 |
print "graph_category disk\n"; |
| 26 |
print "graph_info This graph monitors RAID disk health. Values are percentage of healthy drives in each raid group. Degraded devices are marked Critical.\n"; |
| 27 |
print "graph_args --base 1000 -l 0\n"; |
| 28 |
print "graph_vlabel % healthy/rebuilt\n"; |
| 29 |
print "graph_scale no\n"; |
| 30 |
} |
| 31 |
|
| 32 |
{
|
| 33 |
local( $/, *MDSTAT ) ; |
| 34 |
open (MDSTAT, "/proc/mdstat") or exit 1; |
| 35 |
#open (MDSTAT, "/etc/munin/plugins/sample.failed") or exit 1; |
| 36 |
my $text = <MDSTAT>; |
| 37 |
close MDSTAT; |
| 38 |
|
| 39 |
# Should look like "active raid1 sda1[0] sdc1[2] sdb1[1]" |
| 40 |
# Interestingly, swap is presented as "active (auto-read-only)" |
| 41 |
while ($text =~ /(md\d+)\s+:\s+active\s+(\(auto-read-only\)\s+|)(\w+)\s+(.*)\n.*\[(\d+)\/(\d+)]\s+\[(\w+)]/ ) {
|
| 42 |
my($dev,$dummy,$type,$members,$nmem,$nact,$status) = ($1,$2,$3,$4,$5,$6,$7); |
| 43 |
# print "$text\nitem: $dev $type ($members) status=$status \n"; |
| 44 |
if ( $ARGV[0] and $ARGV[0] eq "config" ) {
|
| 45 |
print "$dev.label $dev\n"; |
| 46 |
print "$dev.info $type $members\n"; |
| 47 |
# 100: means less than 100 |
| 48 |
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. |
| 49 |
print "$dev.critical 98:\n"; |
| 50 |
print $dev, "_rebuild.label $dev rebuilt\n"; |
| 51 |
print $dev, "_rebuild.info $type\n"; |
| 52 |
# Because of an unfound bug, sometimes reported as 99.XX even when OS reports 100. |
| 53 |
print $dev, "_rebuild.critical 98:\n"; |
| 54 |
} else {
|
| 55 |
my $pct = 100 * $nact / $nmem; |
| 56 |
my $rpct = 100; |
| 57 |
if ( $pct < 100 ) {
|
| 58 |
my @output = `/sbin/mdadm -D /dev/$dev | grep Rebuild`; |
| 59 |
if( $output[0] =~ /([0-9]+)% complete/ ) {
|
| 60 |
$rpct = $1; |
| 61 |
} else {
|
| 62 |
$rpct = 0; |
| 63 |
} |
| 64 |
} |
| 65 |
print "$dev.value $pct\n"; |
| 66 |
print $dev, "_rebuild.value $rpct\n"; |
| 67 |
} |
| 68 |
$text = $'; |
| 69 |
} |
| 70 |
} |
| 71 |
|
| 72 |
exit 0; |
| 73 |
|
