root / plugins / disk / smart_ @ c96bafa1
Historique | Voir | Annoter | Télécharger (21,6 ko)
| 1 | c1985259 | Nicolas Stransky | #!/usr/bin/env python |
|---|---|---|---|
| 2 | # -*- encoding: iso-8859-1 -*- |
||
| 3 | # |
||
| 4 | # Wildcard-plugin to monitor S.M.A.R.T attribute values through smartctl, |
||
| 5 | # which is part of smartmontools package: |
||
| 6 | # http://smartmontools.sourceforge.net/ |
||
| 7 | # |
||
| 8 | # To monitor a S.M.A.R.T device, link smart_<device> to this file. |
||
| 9 | # E.g. |
||
| 10 | # ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda |
||
| 11 | # ...will monitor /dev/hda. |
||
| 12 | # |
||
| 13 | # Needs following minimal configuration in plugin-conf.d/munin-node: |
||
| 14 | # [smart_*] |
||
| 15 | # user root |
||
| 16 | # group disk |
||
| 17 | # |
||
| 18 | # Parameters |
||
| 19 | # smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl) |
||
| 20 | # smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs |
||
| 21 | # ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False |
||
| 22 | # |
||
| 23 | # Parameters can be specified on a per-drive basis, eg: |
||
| 24 | # [smart_hda] |
||
| 25 | # user root |
||
| 26 | # group disk |
||
| 27 | # env.smartargs -H -c -l error -l selftest -l selective -d ata |
||
| 28 | # env.smartpath /usr/local/sbin/smartctl |
||
| 29 | # |
||
| 30 | # [smart_twa0-1] |
||
| 31 | # user root |
||
| 32 | # group disk |
||
| 33 | # env.smartargs -H -l error -d 3ware,1 |
||
| 34 | # env.ignorestandby True |
||
| 35 | # |
||
| 36 | # [smart_twa0-2] |
||
| 37 | # user root |
||
| 38 | # group disk |
||
| 39 | # env.smartargs -H -l error -d 3ware,2 |
||
| 40 | # |
||
| 41 | # Author: Nicolas Stransky <Nico@neo-lan.net> |
||
| 42 | # |
||
| 43 | # v1.0 22/08/2004 - First draft |
||
| 44 | # v1.2 28/08/2004 - Clean up the code, add a verbose option |
||
| 45 | # v1.3 14/11/2004 - Compatibility with python<2.2. See comments in the code |
||
| 46 | # v1.4 17/11/2004 - Deal with non zero exit codes of smartctl |
||
| 47 | # - config now prints the critical thresholds, as reported by smartctl |
||
| 48 | # v1.5 18/11/2004 - Plot smartctl_exit_code bitmask |
||
| 49 | # v1.6 21/11/2004 - Add autoconf and suggest capabilities |
||
| 50 | # - smartctl path can be passed through "smartpath" environment variable |
||
| 51 | # - Additional smartctl args can be passed through "smartargs" environment variable |
||
| 52 | # v1.7 29/11/2004 - Add suggest capabilities for NetBSD, OpenBSD, FreeBSD and SunOS. |
||
| 53 | # - Allow to override completely the smartctl arguments with "smartargs" |
||
| 54 | # v1.8 16/02/2005 - Exit status field now only triggers warnings, not criticals. |
||
| 55 | # v1.9 07/07/2005 - Allow to query several drives on the same 3ware card. |
||
| 56 | # - Correct a bug when '-i' was not listed in smartargs |
||
| 57 | # - Don't fail if no value was obtained for hard drive model |
||
| 58 | # v1.10 19/08/2005 - smartctl_exit_code is now a numerical value |
||
| 59 | # v2.0 08/05/2009 - Correct bug in the interpretation of smartctl_exit_code |
||
| 60 | # - New option to suppress SMART warnings in munin |
||
| 61 | # - Temporary lack of output for previously existing drive now reports U |
||
| 62 | # - The plugin now contains its own documentation for use with munindoc |
||
| 63 | # - Removed python<2.2 compatibility comments |
||
| 64 | # - Better autodetection of drives |
||
| 65 | # - Don't spin up devices in a low-power mode. |
||
| 66 | # |
||
| 67 | # Copyright (c) 2004-2009 Nicolas Stransky. |
||
| 68 | # |
||
| 69 | # Permission to use, copy, and modify this software with or without fee |
||
| 70 | # is hereby granted, provided that this entire notice is included in |
||
| 71 | # all source code copies of any software which is or includes a copy or |
||
| 72 | # modification of this software. |
||
| 73 | # |
||
| 74 | # THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR |
||
| 75 | # IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY |
||
| 76 | # REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE |
||
| 77 | # MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR |
||
| 78 | # PURPOSE. |
||
| 79 | # |
||
| 80 | # |
||
| 81 | # Magic markers |
||
| 82 | #%# capabilities=autoconf suggest |
||
| 83 | #%# family=auto |
||
| 84 | |||
| 85 | ## You may edit the following 3 variables |
||
| 86 | # Increase verbosity (True/False) |
||
| 87 | verbose=False |
||
| 88 | # Suppress SMART warnings (True/False) |
||
| 89 | report_warnings=True |
||
| 90 | # Modify to your needs: |
||
| 91 | statefiledir='/var/lib/munin/plugin-state/' |
||
| 92 | # You may not modify anything below this line |
||
| 93 | |||
| 94 | import os, sys, string, pickle |
||
| 95 | from math import log |
||
| 96 | plugin_version="2.0" |
||
| 97 | |||
| 98 | def verboselog(s): |
||
| 99 | global plugin_name |
||
| 100 | sys.stderr.write(plugin_name+': '+s+'\n') |
||
| 101 | |||
| 102 | if not verbose : |
||
| 103 | verboselog = lambda s: None |
||
| 104 | |||
| 105 | def read_values(hard_drive): |
||
| 106 | global smart_values, emptyoutput |
||
| 107 | try : |
||
| 108 | verboselog('Reading S.M.A.R.T values')
|
||
| 109 | os.putenv('LC_ALL','C')
|
||
| 110 | smart_output=os.popen(os.getenv('smartpath','/usr/sbin/smartctl')+' '+os.getenv('smartargs','-a')+(os.getenv('ignorestandby',False) and ' ' or ' -n standby ')+'-A -i /dev/'+hard_drive)
|
||
| 111 | read_values=0 |
||
| 112 | for l in smart_output : |
||
| 113 | if l[:-1]=='' : |
||
| 114 | read_values=0 |
||
| 115 | elif l[:13]=='Device Model:' or l[:7]=='Device:' : |
||
| 116 | model_list=string.split(string.split(l,':')[1]) |
||
| 117 | try: model_list.remove('Version')
|
||
| 118 | except : None |
||
| 119 | model=string.join(model_list) |
||
| 120 | if read_values==1 : |
||
| 121 | smart_attribute=string.split(l) |
||
| 122 | smart_values[string.replace(smart_attribute[1],'-','_')]={"value":smart_attribute[3],"threshold":smart_attribute[5]}
|
||
| 123 | elif l[:18]=="ID# ATTRIBUTE_NAME" : |
||
| 124 | # Start reading the Attributes block |
||
| 125 | read_values=1 |
||
| 126 | exit_status=smart_output.close() |
||
| 127 | if exit_status!=None : |
||
| 128 | # smartctl exit code is a bitmask, check man page. |
||
| 129 | num_exit_status=int(exit_status/256) # Python convention |
||
| 130 | if int(log(num_exit_status,2))<=2 : # bit code |
||
| 131 | verboselog('smartctl cannot access S.M.A.R.T values on drive '+hard_drive+'. Command exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+')')
|
||
| 132 | else : |
||
| 133 | verboselog('smartctl exited with code '+str(num_exit_status)+' (bit '+str(int(log(num_exit_status,2)))+'). '+hard_drive+' may be FAILING RIGHT NOW!')
|
||
| 134 | else : |
||
| 135 | num_exit_status=0 |
||
| 136 | except : |
||
| 137 | verboselog('Cannot access S.M.A.R.T values! Check user rights or propper smartmontools installation/arguments.')
|
||
| 138 | sys.exit(1) |
||
| 139 | if smart_values=={} :
|
||
| 140 | verboselog('Can\'t find any S.M.A.R.T values in smartctl output!')
|
||
| 141 | emptyoutput=True |
||
| 142 | #sys.exit(1) |
||
| 143 | else : emptyoutput=False |
||
| 144 | smart_values["smartctl_exit_status"]={"value":str(num_exit_status),"threshold":"1"}
|
||
| 145 | try : smart_values["model"]=model |
||
| 146 | # For some reason we may have no value for "model" |
||
| 147 | except : smart_values["model"]="unknown" |
||
| 148 | return(exit_status) |
||
| 149 | |||
| 150 | def open_state_file(hard_drive,mode) : |
||
| 151 | global statefiledir |
||
| 152 | return open(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state',mode) |
||
| 153 | |||
| 154 | def update_state_file(hard_drive) : |
||
| 155 | try: |
||
| 156 | verboselog('Saving statefile')
|
||
| 157 | pickle.dump(smart_values,open_state_file(hard_drive,"w")) |
||
| 158 | except : |
||
| 159 | verboselog('Error trying to save state file! Check access rights')
|
||
| 160 | |||
| 161 | def print_plugin_values(hard_drive) : |
||
| 162 | global emptyoutput, smart_values |
||
| 163 | if not emptyoutput: |
||
| 164 | verboselog('Printing S.M.A.R.T values')
|
||
| 165 | for key in smart_values.keys() : |
||
| 166 | if key=="model" : continue |
||
| 167 | print(key+".value "+smart_values[key]["value"]) |
||
| 168 | else: |
||
| 169 | print_unknown_from_statefile(hard_drive,smart_values) |
||
| 170 | |||
| 171 | def print_config(hard_drive) : |
||
| 172 | global report_warnings, smart_values, statefiledir |
||
| 173 | if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'): |
||
| 174 | try : |
||
| 175 | verboselog('Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
|
||
| 176 | smart_values_state=pickle.load(open_state_file(hard_drive,"r")) |
||
| 177 | except : |
||
| 178 | verboselog('Error opening existing state file!')
|
||
| 179 | sys.exit(1) |
||
| 180 | else : |
||
| 181 | verboselog('No state file, reading S.M.A.R.T values for the first time')
|
||
| 182 | read_values(hard_drive[0]) |
||
| 183 | pickle.dump(smart_values,open_state_file(hard_drive,"w")) |
||
| 184 | smart_values_state=smart_values |
||
| 185 | |||
| 186 | verboselog('Printing configuration')
|
||
| 187 | print('graph_title S.M.A.R.T values for drive '+string.join(hard_drive,","))
|
||
| 188 | print('graph_vlabel Attribute S.M.A.R.T value')
|
||
| 189 | print('graph_args --base 1000 --lower-limit 0')
|
||
| 190 | print('graph_category disk')
|
||
| 191 | print('graph_info This graph shows the value of all S.M.A.R.T attributes of drive '+string.join(hard_drive,",")+' ('+smart_values_state['model']+'). smartctl_exit_status is the return value of smartctl. A non-zero return value indicates an error, a potential error, or a fault on the drive.')
|
||
| 192 | attributes=smart_values_state.keys() |
||
| 193 | attributes.sort() |
||
| 194 | for key in attributes : |
||
| 195 | if key in ['smartctl_exit_status','model'] : continue |
||
| 196 | print(key+'.label '+key) |
||
| 197 | print(key+'.draw LINE2') |
||
| 198 | if report_warnings: print(key+'.critical '+smart_values_state[key]["threshold"]+':') |
||
| 199 | print('smartctl_exit_status.label smartctl_exit_status')
|
||
| 200 | print('smartctl_exit_status.draw LINE2')
|
||
| 201 | if report_warnings: print('smartctl_exit_status.warning '+smart_values_state['smartctl_exit_status']["threshold"])
|
||
| 202 | |||
| 203 | def print_unknown_from_statefile(hard_drive,smart_values) : |
||
| 204 | global statefiledir |
||
| 205 | if os.path.exists(statefiledir+'/smart-'+string.join(hard_drive,"-")+'.state'): |
||
| 206 | try : |
||
| 207 | verboselog('Failed to get S.M.A.R.T values from drive. Try to recall previous S.M.A.R.T attributes for '+string.join(hard_drive,","))
|
||
| 208 | smart_values_state=pickle.load(open_state_file(hard_drive,"r")) |
||
| 209 | except : |
||
| 210 | verboselog('Error opening existing state file!')
|
||
| 211 | sys.exit(1) |
||
| 212 | else : |
||
| 213 | verboselog('No state file, reading S.M.A.R.T values for the first time')
|
||
| 214 | exit(1) |
||
| 215 | |||
| 216 | verboselog('Printing unknown values for all attributes in state file')
|
||
| 217 | attributes=smart_values_state.keys() |
||
| 218 | attributes.sort() |
||
| 219 | for key in attributes : |
||
| 220 | if key=='model' : continue |
||
| 221 | print(key+'.value U') |
||
| 222 | |||
| 223 | def get_hard_drive_name() : |
||
| 224 | global plugin_name |
||
| 225 | try : |
||
| 226 | name=[plugin_name[string.rindex(plugin_name,'_')+1:]] |
||
| 227 | if os.uname()[0]=="SunOS" : |
||
| 228 | try : |
||
| 229 | # if hard_drive name starts with "rdsk" or "rmt", try to reconstruct the path |
||
| 230 | if name[0][0:4]=="rdsk": |
||
| 231 | name[0]=os.path.join("rdsk",name[0][4:])
|
||
| 232 | elif name[0][0:3]=="rmt": |
||
| 233 | name[0]=os.path.join("rmt",name[0][3:])
|
||
| 234 | except : |
||
| 235 | verboselog('Failed to find SunOS hard_drive')
|
||
| 236 | # For 3ware cards, we have to set multiple plugins for the same hard drive name. |
||
| 237 | # Let's see if we find a '-' in the drive name. |
||
| 238 | if name[0].find('-')!=-1:
|
||
| 239 | # Put the drive name and it's number in a list |
||
| 240 | name=[name[0][:string.rindex(name[0],'-')],name[0][string.rindex(name[0],'-')+1:]] |
||
| 241 | # Chech that the drive exists in /dev |
||
| 242 | if not os.path.exists('/dev/'+name[0]):
|
||
| 243 | verboselog('/dev/'+name[0]+' not found!')
|
||
| 244 | sys.exit(1) |
||
| 245 | return(name) |
||
| 246 | except : |
||
| 247 | verboselog('No S.M.A.R.T device name found in plugin\'s symlink!')
|
||
| 248 | sys.exit(1) |
||
| 249 | |||
| 250 | def find_smart_drives() : |
||
| 251 | global emptyoutput |
||
| 252 | # Try to autodetect Linux, *BSD, SunOS drives. Don't try to autodetect drives on a 3Ware card. |
||
| 253 | drives=[] |
||
| 254 | if os.uname()[0]=="Linux" : |
||
| 255 | if os.path.exists('/sys/block/'):
|
||
| 256 | # Running 2.6 |
||
| 257 | try : |
||
| 258 | for drive in os.listdir('/sys/block/') :
|
||
| 259 | if drive[:2] in ['md','fd','lo','ra','dm'] : continue # Ignore MD, Floppy, loop , RAM and LVM devices. |
||
| 260 | try : |
||
| 261 | verboselog('Trying '+drive+'...')
|
||
| 262 | exit_status=read_values(drive) |
||
| 263 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 264 | drives.append(drive) |
||
| 265 | except : |
||
| 266 | continue |
||
| 267 | except : |
||
| 268 | verboselog('Failed to list devices in /sys/block')
|
||
| 269 | else : |
||
| 270 | verboselog('Not running linux2.6, failing back to /proc/partitions')
|
||
| 271 | try : |
||
| 272 | partitions=open('/proc/partitions','r')
|
||
| 273 | L=partitions.readlines() |
||
| 274 | for l in L : |
||
| 275 | words=string.split(l) |
||
| 276 | if len(words)==0 or words[0][0] not in string.digits : continue |
||
| 277 | if words[0] in ['1','9','58','254'] : continue # Ignore RAM, md, LVM and LVM2 devices |
||
| 278 | if words[-1][-1] not in string.digits : |
||
| 279 | try : |
||
| 280 | verboselog('Trying '+words[-1]+'...')
|
||
| 281 | exit_status=read_values(words[-1]) |
||
| 282 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 283 | drives.append(words[-1]) |
||
| 284 | except : |
||
| 285 | continue |
||
| 286 | verboselog('Found drives in /proc/partitions ! '+str(drives))
|
||
| 287 | except : |
||
| 288 | verboselog('Failed to list devices in /proc/partitions')
|
||
| 289 | elif os.uname()[0]=="OpenBSD" : |
||
| 290 | try : |
||
| 291 | sysctl_kerndisks=os.popen('sysctl hw.disknames')
|
||
| 292 | kerndisks=string.strip(sysctl_kerndisks.readline()) |
||
| 293 | for drive in string.split(kerndisks[string.rindex(kerndisks,'=')+1:],',') : |
||
| 294 | if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy |
||
| 295 | try : |
||
| 296 | verboselog('Trying '+drive+'c...')
|
||
| 297 | exit_status=read_values(drive+'c') |
||
| 298 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 299 | drives.append(drive+'c') |
||
| 300 | except : |
||
| 301 | continue |
||
| 302 | except : |
||
| 303 | verboselog('Failed to list OpenBSD disks')
|
||
| 304 | elif os.uname()[0]=="FreeBSD" : |
||
| 305 | try : |
||
| 306 | sysctl_kerndisks=os.popen('sysctl kern.disks')
|
||
| 307 | kerndisks=string.strip(sysctl_kerndisks.readline()) |
||
| 308 | for drive in string.split(kerndisks)[1:] : |
||
| 309 | if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy |
||
| 310 | try : |
||
| 311 | verboselog('Trying '+drive+'...')
|
||
| 312 | exit_status=read_values(drive) |
||
| 313 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 314 | drives.append(drive) |
||
| 315 | except : |
||
| 316 | continue |
||
| 317 | except : |
||
| 318 | verboselog('Failed to list FreeBSD disks')
|
||
| 319 | elif os.uname()[0]=="NetBSD" : |
||
| 320 | try : |
||
| 321 | sysctl_kerndisks=os.popen('sysctl hw.disknames')
|
||
| 322 | kerndisks=string.strip(sysctl_kerndisks.readline()) |
||
| 323 | for drive in string.split(kerndisks)[2:] : |
||
| 324 | if drive[:2] in ['md','cd','fd'] : continue # Ignore Memory Disks, CD-ROM drives and Floppy |
||
| 325 | try : |
||
| 326 | verboselog('Trying '+drive+'c...')
|
||
| 327 | exit_status=read_values(drive+'c') |
||
| 328 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 329 | drives.append(drive+'c') |
||
| 330 | except : |
||
| 331 | continue |
||
| 332 | except : |
||
| 333 | verboselog('Failed to list NetBSD disks')
|
||
| 334 | elif os.uname()[0]=="SunOS" : |
||
| 335 | try : |
||
| 336 | from glob import glob |
||
| 337 | for drivepath in glob('/dev/rdsk/*s2') :
|
||
| 338 | try : |
||
| 339 | drive=os.path.basename(drivepath) |
||
| 340 | verboselog('Trying rdsk'+drive+'...')
|
||
| 341 | exit_status=read_values('rdsk'+drive)
|
||
| 342 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 343 | drives.append('rdsk'+drive)
|
||
| 344 | except : |
||
| 345 | continue |
||
| 346 | for drivepath in glob('/dev/rmt/*') :
|
||
| 347 | try : |
||
| 348 | drive=os.path.basename(drivepath) |
||
| 349 | verboselog('Trying rmt'+drive+'...')
|
||
| 350 | exit_status=read_values('rmt'+drive)
|
||
| 351 | if (exit_status==None or int(log(int(exit_status/256),2))>2) and not emptyoutput: |
||
| 352 | drives.append('rmt'+drive)
|
||
| 353 | except : |
||
| 354 | continue |
||
| 355 | except : |
||
| 356 | verboselog('Failed to list SunOS disks')
|
||
| 357 | return(drives) |
||
| 358 | |||
| 359 | ### Main part ### |
||
| 360 | |||
| 361 | smart_values={}
|
||
| 362 | emptyoutput=False |
||
| 363 | plugin_name=list(os.path.split(sys.argv[0]))[1] |
||
| 364 | verboselog('plugins\' UID: '+str(os.geteuid())+' / plugins\' GID: '+str(os.getegid()))
|
||
| 365 | |||
| 366 | # Parse arguments |
||
| 367 | if len(sys.argv)>1 : |
||
| 368 | if sys.argv[1]=="config" : |
||
| 369 | hard_drive=get_hard_drive_name() |
||
| 370 | print_config(hard_drive) |
||
| 371 | sys.exit(0) |
||
| 372 | elif sys.argv[1]=="autoconf" : |
||
| 373 | if os.path.exists(os.getenv('smartpath','/usr/sbin/smartctl')) :
|
||
| 374 | print('yes')
|
||
| 375 | sys.exit(0) |
||
| 376 | else : |
||
| 377 | print('no (smartmontools not found)')
|
||
| 378 | sys.exit(1) |
||
| 379 | elif sys.argv[1]=="suggest" : |
||
| 380 | for drive in find_smart_drives() : |
||
| 381 | print(drive) |
||
| 382 | sys.exit(0) |
||
| 383 | elif sys.argv[1]=="version" : |
||
| 384 | print('smart_ Munin plugin, version '+plugin_version)
|
||
| 385 | sys.exit(0) |
||
| 386 | elif sys.argv[1]!="" : |
||
| 387 | verboselog('unknown argument "'+sys.argv[1]+'"')
|
||
| 388 | sys.exit(1) |
||
| 389 | |||
| 390 | # No argument given, doing the real job: |
||
| 391 | hard_drive=get_hard_drive_name() |
||
| 392 | read_values(hard_drive[0]) |
||
| 393 | if not emptyoutput: update_state_file(hard_drive) |
||
| 394 | print_plugin_values(hard_drive) |
||
| 395 | exit(0) |
||
| 396 | |||
| 397 | |||
| 398 | ### The following is the smart_ plugin documentation, intended to be used with munindoc |
||
| 399 | """ |
||
| 400 | =head1 NAME |
||
| 401 | |||
| 402 | smart_ - Munin wildcard-plugin to monitor S.M.A.R.T. attribute values through smartctl |
||
| 403 | |||
| 404 | =head1 APPLICABLE SYSTEMS |
||
| 405 | |||
| 406 | Node with B<Python> interpreter and B<smartmontools> (http://smartmontools.sourceforge.net/) |
||
| 407 | installed and in function. |
||
| 408 | |||
| 409 | =head1 CONFIGURATION |
||
| 410 | |||
| 411 | =head2 Create link in service directory |
||
| 412 | |||
| 413 | To monitor a S.M.A.R.T device, create a link in the service directory |
||
| 414 | of the munin-node named smart_<device>, which is pointing to this file. |
||
| 415 | |||
| 416 | E.g. |
||
| 417 | |||
| 418 | ln -s /usr/share/munin/plugins/smart_ /etc/munin/plugins/smart_hda |
||
| 419 | |||
| 420 | ...will monitor /dev/hda. |
||
| 421 | |||
| 422 | =head2 Grant privileges in munin-node |
||
| 423 | |||
| 424 | The plugin must be run under high privileged user B<root>, to get access to the raw device. |
||
| 425 | |||
| 426 | So following minimal configuration in plugin-conf.d/munin-node is needed. |
||
| 427 | |||
| 428 | =over 2 |
||
| 429 | |||
| 430 | [smart_*] |
||
| 431 | user root |
||
| 432 | group disk |
||
| 433 | |||
| 434 | =back |
||
| 435 | |||
| 436 | =head2 Set Parameter if needed |
||
| 437 | |||
| 438 | smartpath - Specify path to smartctl program (Default: /usr/sbin/smartctl) |
||
| 439 | smartargs - Override '-a' argument passed to smartctl with '-A -i'+smartargs |
||
| 440 | ignorestandby - Ignore the standby state of the drive and perform SMART query. Default: False |
||
| 441 | |||
| 442 | Parameters can be specified on a per-drive basis, eg: |
||
| 443 | |||
| 444 | =over 2 |
||
| 445 | |||
| 446 | [smart_hda] |
||
| 447 | user root |
||
| 448 | env.smartargs -H -c -l error -l selftest -l selective -d ata |
||
| 449 | env.smartpath /usr/local/sbin/smartctl |
||
| 450 | |||
| 451 | =back |
||
| 452 | |||
| 453 | In particular, for SATA drives, with older versions of smartctl: |
||
| 454 | |||
| 455 | =over 2 |
||
| 456 | |||
| 457 | [smart_sda] |
||
| 458 | user root |
||
| 459 | env.smartargs -d ata -a |
||
| 460 | |||
| 461 | [smart_twa0-1] |
||
| 462 | user root |
||
| 463 | env.smartargs -H -l error -d 3ware,1 |
||
| 464 | env.ignorestandby True |
||
| 465 | |||
| 466 | [smart_twa0-2] |
||
| 467 | user root |
||
| 468 | env.smartargs -H -l error -d 3ware,2 |
||
| 469 | |||
| 470 | =back |
||
| 471 | |||
| 472 | =head1 INTERPRETATION |
||
| 473 | |||
| 474 | If a device supports the B<Self-Monitoring, Analysis |
||
| 475 | and Reporting Technology (S.M.A.R.T.)> it offers readable |
||
| 476 | access to the attribute table. There you find the B<raw value>, |
||
| 477 | a B<normalised value> and a B<threshold> (set by the vendor) |
||
| 478 | for each attribute, that is supported by that device. |
||
| 479 | |||
| 480 | The meaning and handling of the raw value is a secret of the |
||
| 481 | vendors embedded S.M.A.R.T.-Software on the disk. The only |
||
| 482 | relevant info from our external view is the B<normalised value> |
||
| 483 | in comparison with the B<threshold>. If the attributes value is |
||
| 484 | equal or below the threshold, it signals its failure and |
||
| 485 | the B<health status> of the device will switch from B<passed> to B<failed>. |
||
| 486 | |||
| 487 | This plugin fetches the B<normalised values of all SMART-Attributes> |
||
| 488 | and draw a curve for each of them. |
||
| 489 | It takes the vendors threshold as critical limit for the munin datafield. |
||
| 490 | So you will see an alarm, if the value reaches the vendors threshold. |
||
| 491 | |||
| 492 | Looking at the graph: It is a bad sign, if the curve starts |
||
| 493 | to curl or to meander. The more horizontal it runs, |
||
| 494 | the better. Of course it is normal, that the temperatures |
||
| 495 | curve swings a bit. But the others should stay steady on |
||
| 496 | their level if everything is ok. |
||
| 497 | |||
| 498 | S.M.A.R.T. distinguishes between B<Pre-fail> and B<Old-age> |
||
| 499 | Attributes. An old disk will have more curling curves |
||
| 500 | because of degradation, especially for the B<Old-age> Attributes. |
||
| 501 | You should then backup more often, run more selftests[1] and prepare |
||
| 502 | the disks replacement. |
||
| 503 | |||
| 504 | B<Act directly>, if a <Pre-Fail> Attribute goes below threshold. |
||
| 505 | Immediately back-up your data and replace your hard disk drive. |
||
| 506 | A failure may be imminent.. |
||
| 507 | |||
| 508 | [1] Consult the smartmontools manpages to learn about |
||
| 509 | offline tests and automated selftests with smartd. |
||
| 510 | Only with both activated, the values of the SMART-Attributes |
||
| 511 | reflect the all over state of the device. |
||
| 512 | |||
| 513 | Tutorials and articles about S.M.A.R.T. and smartmontools: |
||
| 514 | http://smartmontools.sourceforge.net/doc.html#tutorials |
||
| 515 | |||
| 516 | =head1 MAGIC MARKERS |
||
| 517 | |||
| 518 | #%# family=auto |
||
| 519 | #%# capabilities=autoconf suggest |
||
| 520 | |||
| 521 | =head1 CALL OPTIONS |
||
| 522 | |||
| 523 | B<none> |
||
| 524 | |||
| 525 | =over 2 |
||
| 526 | |||
| 527 | Fetches values if called without arguments: |
||
| 528 | |||
| 529 | E.g.: munin-run smart_hda |
||
| 530 | |||
| 531 | =back |
||
| 532 | |||
| 533 | B<config> |
||
| 534 | |||
| 535 | =over 2 |
||
| 536 | |||
| 537 | Prints plugins configuration. |
||
| 538 | |||
| 539 | E.g.: munin-run smart_hda config |
||
| 540 | |||
| 541 | =back |
||
| 542 | |||
| 543 | B<autoconf> |
||
| 544 | |||
| 545 | =over 2 |
||
| 546 | |||
| 547 | Tries to find smartctl and outputs value 'yes' for success, 'no' if not. |
||
| 548 | |||
| 549 | It's used by B<munin-node-configure> to see wether autoconfiguration is possible. |
||
| 550 | |||
| 551 | =back |
||
| 552 | |||
| 553 | B<suggest> |
||
| 554 | |||
| 555 | =over 2 |
||
| 556 | |||
| 557 | Outputs the list of device names, that it found plugged to the system. |
||
| 558 | |||
| 559 | B<munin-node-configure> use this to build the service links for this wildcard-plugin. |
||
| 560 | |||
| 561 | =back |
||
| 562 | |||
| 563 | =head1 VERSION |
||
| 564 | |||
| 565 | Version 2.0 |
||
| 566 | |||
| 567 | =head1 BUGS |
||
| 568 | |||
| 569 | None known |
||
| 570 | |||
| 571 | =head1 AUTHOR |
||
| 572 | |||
| 573 | (C) 2004-2009 Nicolas Stransky <Nico@stransky.cx> |
||
| 574 | |||
| 575 | (C) 2008 Gabriele Pohl <contact@dipohl.de> |
||
| 576 | Reformated existent documentation to POD-Style, added section Interpretation to the documentation. |
||
| 577 | |||
| 578 | =head1 LICENSE |
||
| 579 | |||
| 580 | GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt) |
||
| 581 | |||
| 582 | =cut |
||
| 583 | |||
| 584 | |||
| 585 | """ |
