Révision d3bcc2f9
raid2: yet another raid+scrub monitoring plugin
Supports:
- mdstat
- btrfs
- cciss
- megasasctl
| plugins/disk/raid2 | ||
|---|---|---|
| 1 |
#!/usr/bin/python3 -tt |
|
| 2 |
# -*- coding: utf-8 -*- |
|
| 3 |
|
|
| 4 |
"""Munin plugin to monitor software and hardware RAID status and scrub status. |
|
| 5 |
|
|
| 6 |
Copyright 2014 Kim B. Heino, Foobar Oy |
|
| 7 |
License GPLv2+ |
|
| 8 |
|
|
| 9 |
#%# capabilities=autoconf |
|
| 10 |
#%# family=auto |
|
| 11 |
""" |
|
| 12 |
|
|
| 13 |
import glob |
|
| 14 |
import os |
|
| 15 |
import re |
|
| 16 |
import subprocess |
|
| 17 |
import sys |
|
| 18 |
|
|
| 19 |
|
|
| 20 |
def safename(variable): |
|
| 21 |
"""Return safe variable name.""" |
|
| 22 |
if variable == '/': |
|
| 23 |
return 'btrfs' |
|
| 24 |
ret = [] |
|
| 25 |
for letter in variable: |
|
| 26 |
if letter.isalnum(): |
|
| 27 |
ret.append(letter) |
|
| 28 |
else: |
|
| 29 |
ret.append('_')
|
|
| 30 |
return ''.join(ret) |
|
| 31 |
|
|
| 32 |
|
|
| 33 |
def run_binary(arg): |
|
| 34 |
"""Run binary and return output.""" |
|
| 35 |
try: |
|
| 36 |
cmd = subprocess.Popen( |
|
| 37 |
arg, shell=False, close_fds=True, bufsize=-1, |
|
| 38 |
stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
|
| 39 |
outdata, dummy_errdata = cmd.communicate() |
|
| 40 |
except OSError: |
|
| 41 |
return None |
|
| 42 |
return outdata.decode('utf-8', 'ignore').strip()
|
|
| 43 |
|
|
| 44 |
|
|
| 45 |
def find_cciss(): |
|
| 46 |
"""Parse /usr/bin/cciss_vol_status.""" |
|
| 47 |
statexe = '/usr/bin/cciss_vol_status' |
|
| 48 |
|
|
| 49 |
# Find device files and binary |
|
| 50 |
devfiles = sorted(glob.glob('/dev/sg*') + glob.glob('/dev/cciss/c*d0'))
|
|
| 51 |
if not devfiles or not os.path.exists(statexe): |
|
| 52 |
return [] |
|
| 53 |
|
|
| 54 |
# Run binary |
|
| 55 |
data = run_binary([statexe] + devfiles) |
|
| 56 |
if not data: |
|
| 57 |
return [] |
|
| 58 |
|
|
| 59 |
# Parse result |
|
| 60 |
data = data.splitlines() |
|
| 61 |
devices = [] |
|
| 62 |
for index, device in enumerate(devfiles): |
|
| 63 |
if index == len(data): |
|
| 64 |
break |
|
| 65 |
if ' status: OK' in data[index]: |
|
| 66 |
status = 1 |
|
| 67 |
elif ' status: ' not in data[index]: |
|
| 68 |
continue |
|
| 69 |
else: |
|
| 70 |
status = 0 |
|
| 71 |
desc = 'Hardware RAID device {}'.format(device)
|
|
| 72 |
devices.append((device, status, desc)) |
|
| 73 |
return devices |
|
| 74 |
|
|
| 75 |
|
|
| 76 |
def find_megasasctl(): |
|
| 77 |
"""Parse /usr/sbin/megasasctl.""" |
|
| 78 |
statexe = '/usr/sbin/megasasctl' |
|
| 79 |
|
|
| 80 |
# Find binary |
|
| 81 |
if not os.path.exists(statexe): |
|
| 82 |
return [] |
|
| 83 |
|
|
| 84 |
# Run binary |
|
| 85 |
data = run_binary(['/usr/sbin/megasasctl', '-HB']) |
|
| 86 |
if data: |
|
| 87 |
status = 0 |
|
| 88 |
else: |
|
| 89 |
status = 1 |
|
| 90 |
return [('lsi', status, 'Hardware RAID device LSI')]
|
|
| 91 |
|
|
| 92 |
|
|
| 93 |
def find_mdstat(): |
|
| 94 |
"""Parse /proc/mdstat.""" |
|
| 95 |
# Read file |
|
| 96 |
try: |
|
| 97 |
fhn = open('/proc/mdstat')
|
|
| 98 |
except IOError: |
|
| 99 |
return [] |
|
| 100 |
lines = fhn.readlines() |
|
| 101 |
fhn.close() |
|
| 102 |
|
|
| 103 |
# Parse it |
|
| 104 |
devices = [] |
|
| 105 |
device = None |
|
| 106 |
for line in lines: |
|
| 107 |
if re.match(r'^md\d+ : ', line): |
|
| 108 |
device = line.split()[0] |
|
| 109 |
elif device: |
|
| 110 |
if '_' in line: |
|
| 111 |
status = 0 |
|
| 112 |
else: |
|
| 113 |
status = 1 |
|
| 114 |
desc = 'Software RAID device {}'.format(device)
|
|
| 115 |
devices.append((device, status, desc)) |
|
| 116 |
device = None |
|
| 117 |
return devices |
|
| 118 |
|
|
| 119 |
|
|
| 120 |
def find_btrfs(): |
|
| 121 |
"""Parse /proc/mounts and btrfs scrub status. Ignore csum errors.""" |
|
| 122 |
# Read file |
|
| 123 |
try: |
|
| 124 |
fhn = open('/proc/mounts')
|
|
| 125 |
except IOError: |
|
| 126 |
return [] |
|
| 127 |
lines = fhn.readlines() |
|
| 128 |
fhn.close() |
|
| 129 |
|
|
| 130 |
# Parse it |
|
| 131 |
devmap = {}
|
|
| 132 |
for line in lines: |
|
| 133 |
line = line.split() |
|
| 134 |
if len(line) > 2 and line[2] == 'btrfs' and line[0] not in devmap: |
|
| 135 |
devmap[line[0]] = line[1] |
|
| 136 |
|
|
| 137 |
# Iterate devices |
|
| 138 |
devices = [] |
|
| 139 |
for mount in devmap.values(): |
|
| 140 |
data = run_binary(['/sbin/btrfs', 'scrub', 'status', '-R', mount]) |
|
| 141 |
if not data or 'data_extents_scrubbed:' not in data: |
|
| 142 |
continue |
|
| 143 |
desc = 'BTRFS in {}'.format(mount)
|
|
| 144 |
if ( # pylint: disable=too-many-boolean-expressions |
|
| 145 |
'read_errors: 0' in data and |
|
| 146 |
'verify_errors: 0' in data and |
|
| 147 |
'super_errors: 0' in data and |
|
| 148 |
'malloc_errors: 0' in data and |
|
| 149 |
'uncorrectable_errors: 0' in data and |
|
| 150 |
'unverified_errors: 0' in data |
|
| 151 |
): |
|
| 152 |
devices.append((mount, 1, desc)) |
|
| 153 |
else: |
|
| 154 |
devices.append((mount, 0, desc)) |
|
| 155 |
return devices |
|
| 156 |
|
|
| 157 |
|
|
| 158 |
def find_devices(): |
|
| 159 |
"""Return list of found device tuples.""" |
|
| 160 |
devices = find_cciss() + find_megasasctl() + find_mdstat() + find_btrfs() |
|
| 161 |
return devices |
|
| 162 |
|
|
| 163 |
|
|
| 164 |
def autoconf(): |
|
| 165 |
"""Print "yes" or "no".""" |
|
| 166 |
status = 'yes' if find_devices() else 'no' |
|
| 167 |
print(status) |
|
| 168 |
|
|
| 169 |
|
|
| 170 |
def config(devices): |
|
| 171 |
"""Print plugin config.""" |
|
| 172 |
print('graph_title RAID and Scrub Status')
|
|
| 173 |
print('graph_vlabel Status')
|
|
| 174 |
print('graph_category disk')
|
|
| 175 |
print('graph_info Health status: 0 = Error, 1 = OK')
|
|
| 176 |
print('graph_args --base 1000 --lower-limit 0 --upper-limit 1')
|
|
| 177 |
for device in devices: |
|
| 178 |
print('{}.label {}'.format(safename(device[0]), device[2]))
|
|
| 179 |
print('{}.warning 1:'.format(safename(device[0])))
|
|
| 180 |
if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1':
|
|
| 181 |
fetch(devices) |
|
| 182 |
|
|
| 183 |
|
|
| 184 |
def fetch(devices): |
|
| 185 |
"""Print values.""" |
|
| 186 |
for device in devices: |
|
| 187 |
print('{}.value {}'.format(safename(device[0]), device[1]))
|
|
| 188 |
|
|
| 189 |
|
|
| 190 |
if __name__ == '__main__': |
|
| 191 |
if len(sys.argv) > 1 and sys.argv[1] == 'autoconf': |
|
| 192 |
autoconf() |
|
| 193 |
elif len(sys.argv) > 1 and sys.argv[1] == 'config': |
|
| 194 |
config(find_devices()) |
|
| 195 |
else: |
|
| 196 |
fetch(find_devices()) |
|
Formats disponibles : Unified diff