root / plugins / snmp / snmp__brocade_ifs @ 17f78427
Historique | Voir | Annoter | Télécharger (14,2 ko)
| 1 |
#!/usr/bin/env python |
|---|---|
| 2 |
|
| 3 |
""" |
| 4 |
Munin plugin which reports selected counters regarding ports on a |
| 5 |
Brocade SAN FC-switch. Only enabled ports are considered. |
| 6 |
|
| 7 |
The counters shown: |
| 8 |
|
| 9 |
enc_out: Encoding errors outside FC frame. |
| 10 |
Not as interesting as enc_out_per_mframe, |
| 11 |
but it reflects the absolute values, instead |
| 12 |
of being put in relation to the port's traffic. |
| 13 |
|
| 14 |
enc_out_per_mframe: As above, but per million frames of traffic. |
| 15 |
If there is a high number for this counter, |
| 16 |
it could reflect: |
| 17 |
- If there is also a high value for |
| 18 |
rx_crcs for the port, then there is likely |
| 19 |
a GBIC/SFP problem. |
| 20 |
- If there the value of rx_crcs for the port |
| 21 |
is low, there is likely a cable/connector |
| 22 |
problem. |
| 23 |
|
| 24 |
rx_crcs: CRC errors detected in received frames. |
| 25 |
Together with enc_out errors, CRC errors |
| 26 |
indicate a GBIC/SFP problem. |
| 27 |
|
| 28 |
bits: Number of bits transmitted(tx)/received(rx) |
| 29 |
by the port. Inspecting this graph will help |
| 30 |
determining if the port is saturated. |
| 31 |
|
| 32 |
When symlinking to the plugin, indicate hostname like this: |
| 33 |
snmp_HOSTNAME_brocade_ifs |
| 34 |
|
| 35 |
# Special requirements: |
| 36 |
# - the pysnmp module; on RHEL 6 with EPEL 6, you may simply yum- |
| 37 |
# install it |
| 38 |
""" |
| 39 |
|
| 40 |
# Note: In the SNMP output from brocade switches, the interesting |
| 41 |
# counters are named with numbers starting with 1, while the |
| 42 |
# ports' real names on the box and in the administration interface |
| 43 |
# start with 0. And there doesn't seem to be a way to map between |
| 44 |
# ifDesc and the interesting crc and enc_out counters :-( |
| 45 |
# Therefore, this plugin is Brocade-specific, and thus some |
| 46 |
# manipulation of port numbers are performed for the output |
| 47 |
# of this plugin (see comments marked ARGH below). |
| 48 |
|
| 49 |
# TODOs: |
| 50 |
# - implement snmpconf? |
| 51 |
|
| 52 |
# Munin magic markers |
| 53 |
#%# family=manual |
| 54 |
#%# capabilities= |
| 55 |
|
| 56 |
# http://community.brocade.com/servlet/JiveServlet/download/5581-1453/portErrShow.pdf |
| 57 |
# is useful when trying to understand counters on a Brocade switch. |
| 58 |
|
| 59 |
# Author: Troels Arvin <tra@sst.dk> |
| 60 |
# See http://troels.arvin.dk/code/munin/ for latest version. |
| 61 |
|
| 62 |
# Only tested with Red Hat Enterprise Linux 5, currently. |
| 63 |
|
| 64 |
# Released according to the "New BSD License" AKA the 3-clause |
| 65 |
# BSD License: |
| 66 |
# ==================================================================== |
| 67 |
# Copyright (c) 2011, Danish National Board of Health. |
| 68 |
# All rights reserved. |
| 69 |
# |
| 70 |
# Redistribution and use in source and binary forms, with or without |
| 71 |
# modification, are permitted provided that the following conditions are met: |
| 72 |
# * Redistributions of source code must retain the above copyright |
| 73 |
# notice, this list of conditions and the following disclaimer. |
| 74 |
# * Redistributions in binary form must reproduce the above copyright |
| 75 |
# notice, this list of conditions and the following disclaimer in the |
| 76 |
# documentation and/or other materials provided with the distribution. |
| 77 |
# * Neither the name of the the Danish National Board of Health nor the |
| 78 |
# names of its contributors may be used to endorse or promote products |
| 79 |
# derived from this software without specific prior written permission. |
| 80 |
# |
| 81 |
# THIS SOFTWARE IS PROVIDED BY the Danish National Board of Health ''AS IS'' AND ANY |
| 82 |
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 83 |
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 84 |
# DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY |
| 85 |
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 86 |
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 87 |
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| 88 |
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 89 |
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 90 |
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 91 |
# ==================================================================== |
| 92 |
|
| 93 |
# $Id: brocade_san_switch_ports_ 15443 2011-03-03 12:23:56Z tra $ |
| 94 |
|
| 95 |
import os, sys, re |
| 96 |
from pysnmp.entity.rfc3413.oneliner import cmdgen |
| 97 |
|
| 98 |
# For reference: |
| 99 |
# SW-MIB::swFCPortLinkState = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6 |
| 100 |
# SW-MIB::swFCPortTxWords = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.11 |
| 101 |
# SW-MIB::swFCPortRxWords = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.12 |
| 102 |
# SW-MIB::swFCPortTxFrames = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.13 |
| 103 |
# SW-MIB::swFCPortRxFrames = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.14 |
| 104 |
# SW-MIB::swFCPortRxCrcs = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.22 |
| 105 |
# SW-MIB::swFCPortRxEncOutFrs = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26 |
| 106 |
|
| 107 |
# OID strings must be without leading dot in this script |
| 108 |
port_link_state_oidstr = '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6' |
| 109 |
oidstrs = {
|
| 110 |
'rx_crcs' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.22', |
| 111 |
'enc_out' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26', |
| 112 |
'tx_words' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.11', |
| 113 |
'rx_words' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.12', |
| 114 |
'tx_frames' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.13', |
| 115 |
'rx_frames' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.14', |
| 116 |
} |
| 117 |
|
| 118 |
descriptions = {
|
| 119 |
'rx_crcs' : 'the number of CRC errors detected for frames received', |
| 120 |
'enc_out' : 'encoding errors outside FC frame', |
| 121 |
'enc_out_per_mframe': 'enc errors outside FC frame, per million frames of rx+tx traffic', |
| 122 |
'bits' : 'received(rx)/transmitted(tx) bits' |
| 123 |
} |
| 124 |
|
| 125 |
rrd_types = {
|
| 126 |
'rx_crcs' : 'GAUGE', |
| 127 |
'enc_out' : 'GAUGE', |
| 128 |
'enc_out_per_mframe': 'GAUGE', |
| 129 |
'bits' : 'COUNTER' |
| 130 |
} |
| 131 |
|
| 132 |
|
| 133 |
# Some helper functions: |
| 134 |
|
| 135 |
def bailout(msg): |
| 136 |
sys.stderr.write(msg+"\n") |
| 137 |
sys.exit(1) |
| 138 |
|
| 139 |
def debug(msg): |
| 140 |
print('Debug: %s\n' % msg)
|
| 141 |
|
| 142 |
# Break OID-string in to a tuple of elements |
| 143 |
def oidstr2tuple(oidstr): |
| 144 |
int_list = [ int(s) for s in oidstr.split('.') ]
|
| 145 |
return tuple(int_list) |
| 146 |
|
| 147 |
# if object_name is 1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26.1, return |
| 148 |
# 1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26 |
| 149 |
def get_ObjectName_subtree(obj): |
| 150 |
return obj[:len(obj)-1] |
| 151 |
|
| 152 |
# Convert SNMP objects to simpler structure, and cut off |
| 153 |
# excessive return-value data (which bulkCmd may generated) |
| 154 |
def varBindTable2plainDict(varBindTable): |
| 155 |
ret_dict = {}
|
| 156 |
wanted_subtree = get_ObjectName_subtree(varBindTable[0][0][0]) |
| 157 |
#debug('wanted_subtree: '+str(wanted_subtree))
|
| 158 |
for varBindTableRow in varBindTable: |
| 159 |
if get_ObjectName_subtree(varBindTableRow[0][0]) == wanted_subtree: |
| 160 |
portnum = varBindTableRow[0][0][-1] |
| 161 |
count = int(varBindTableRow[0][1]) |
| 162 |
ret_dict[portnum] = count |
| 163 |
else: |
| 164 |
#debug('Skipped '+str(varBindTableRow))
|
| 165 |
pass |
| 166 |
#debug('ret_dict: '+str(ret_dict))
|
| 167 |
return ret_dict |
| 168 |
|
| 169 |
|
| 170 |
|
| 171 |
# The more interesting functions: |
| 172 |
|
| 173 |
# Honor the munin-APIs "config" command |
| 174 |
def print_config(host_name,enabled_ports): |
| 175 |
print('host_name %s' % host_name)
|
| 176 |
|
| 177 |
# Per-port |
| 178 |
for counter_type in descriptions: |
| 179 |
for portnum in enabled_ports: |
| 180 |
print('multigraph %s.port_%d' % (counter_type,portnum-1)) # ARGH: numbering base stuff
|
| 181 |
print('graph_title Port %d %s' % (portnum-1,counter_type)) # ARGH: numbering base stuff
|
| 182 |
print('graph_args --base 1000 -l 0')
|
| 183 |
print('graph_category san')
|
| 184 |
print('graph_info This graph shows the count of %s' % descriptions[counter_type])
|
| 185 |
|
| 186 |
if counter_type == 'bits': |
| 187 |
print('graph_vlabel bits rx (-) / tx (+) per ${graph_period}')
|
| 188 |
print('graph_order rx tx')
|
| 189 |
print('rx.label rx')
|
| 190 |
print('rx.graph no')
|
| 191 |
print('rx.type %s' % rrd_types[counter_type])
|
| 192 |
print('rx.max 20000000000') # initial-spike prevention: 20Gbit/s is max FC speed
|
| 193 |
print('tx.label bps')
|
| 194 |
print('tx.negative rx')
|
| 195 |
print('tx.type %s' % rrd_types[counter_type])
|
| 196 |
print('tx.max 20000000000') # initial-spike prevention: 20Gbit/s is max FC speed
|
| 197 |
else: |
| 198 |
print('graph_vlabel count')
|
| 199 |
print('count.label count')
|
| 200 |
print('count.type %s' % rrd_types[counter_type])
|
| 201 |
|
| 202 |
# Totals |
| 203 |
for counter_type in descriptions: |
| 204 |
print('multigraph %s' % counter_type)
|
| 205 |
print('graph_title %s total %s' % (host_name,counter_type))
|
| 206 |
print('graph_args --base 1000 -l 0')
|
| 207 |
print('graph_category san')
|
| 208 |
print('graph_info This graph shows the total count of %s across all ports' % descriptions[counter_type])
|
| 209 |
|
| 210 |
if counter_type == 'bits': |
| 211 |
print('graph_vlabel bits rx (-) / tx (+) per ${graph_period}')
|
| 212 |
print('rx.label rx')
|
| 213 |
print('rx.graph no')
|
| 214 |
print('rx.type %s' % rrd_types[counter_type])
|
| 215 |
print('rx.max 800000000000') # initial-spike prevention: Assuming a max of 40 ports with each 20Gbit/s max
|
| 216 |
print('tx.label bps')
|
| 217 |
print('tx.negative rx')
|
| 218 |
print('tx.type %s' % rrd_types[counter_type])
|
| 219 |
print('tx.max 800000000000') # initial-spike prevention: Assuming a max of 40 ports with each 20Gbit/s max
|
| 220 |
else: |
| 221 |
print('graph_vlabel count')
|
| 222 |
print('count.label count')
|
| 223 |
print('count.type %s' % rrd_types[counter_type])
|
| 224 |
|
| 225 |
# We don't care for disabled ports |
| 226 |
def get_enabled_ports(host_name,community): |
| 227 |
link_states = get_port_values(host_name,community,port_link_state_oidstr) |
| 228 |
# status 1 means enabled |
| 229 |
return [ portnum for portnum in link_states if link_states[portnum] == 1 ] |
| 230 |
|
| 231 |
# Talk to the SNMP agent performing the equivalent of an snmpwalk from |
| 232 |
# the starting point indicated by the oid_start_tpl tuple. |
| 233 |
# Handle potential errors. |
| 234 |
def pull_values(host_name,community,oid_start_tpl): |
| 235 |
try: |
| 236 |
errorIndication, errorStatus, errorIndex, varBindTable = cmdgen.CommandGenerator().bulkCmd( |
| 237 |
cmdgen.CommunityData('whatever', community),
|
| 238 |
cmdgen.UdpTransportTarget((host_name, 161)), |
| 239 |
300, 0, |
| 240 |
(oid_start_tpl) |
| 241 |
) |
| 242 |
except Exception, e: |
| 243 |
err("Walking %s threw exception: %s" % (oid_start_str,str(e)))
|
| 244 |
if errorStatus: |
| 245 |
err("Walking %s failed: %s" % (oid_start_str,errorStatus.prettyPrint()))
|
| 246 |
if errorIndication: |
| 247 |
err("Walking %s failed with errorIndication=" % (oid_start_str,errorIndication))
|
| 248 |
if len(varBindTable) < 1: |
| 249 |
err("Empty result from walk of %s" % oid_start_str)
|
| 250 |
#debug('Pull result: %s' % varBindTable)
|
| 251 |
return varBindTable |
| 252 |
|
| 253 |
# Combine oidstr2tupl, pull_values and varBindTable2plainDict. |
| 254 |
# Return dict of port-number => count |
| 255 |
def get_port_values(host_name,community,oid_start_str): |
| 256 |
return varBindTable2plainDict( |
| 257 |
pull_values(host_name,community,oidstr2tuple(oid_start_str)) |
| 258 |
) |
| 259 |
|
| 260 |
# Initial sanity check |
| 261 |
n_args=len(sys.argv) |
| 262 |
if n_args > 2: |
| 263 |
# At most one arg expected |
| 264 |
bailout('%d arguments given - expecting only one' % n_args)
|
| 265 |
|
| 266 |
# Make sure that multigraphs are supported |
| 267 |
if 'MUNIN_CAP_MULTIGRAPH' not in os.environ: |
| 268 |
bailout('MUNIN_CAP_MULTIGRAPH not found in environment')
|
| 269 |
|
| 270 |
# Parse host_name and counter type from arg0 |
| 271 |
called_as = os.path.basename(sys.argv[0]) |
| 272 |
regex_str = r'^snmp_(.+)_brocade_ifs' |
| 273 |
match = re.match(regex_str, called_as) |
| 274 |
if match: |
| 275 |
host_name = match.group(1) |
| 276 |
else: |
| 277 |
bailout('Missing host_name and/or counter type')
|
| 278 |
|
| 279 |
# Determine SNMP community |
| 280 |
try: |
| 281 |
community = os.environ['community'] |
| 282 |
except: |
| 283 |
community = 'public' |
| 284 |
|
| 285 |
enabled_ports = get_enabled_ports(host_name,community) |
| 286 |
|
| 287 |
# See how we were called |
| 288 |
if n_args == 2: |
| 289 |
# An argument was given, so let's not simply print |
| 290 |
# values. |
| 291 |
arg = sys.argv[1] |
| 292 |
if arg == 'config': |
| 293 |
print_config(host_name,enabled_ports) |
| 294 |
sys.exit(0) |
| 295 |
if arg == 'fetch': |
| 296 |
pass |
| 297 |
else: |
| 298 |
bailout("Unknown argument '%s'" % arg)
|
| 299 |
sys.exit(1) |
| 300 |
|
| 301 |
# Prepare some structures |
| 302 |
counters = {}
|
| 303 |
counters['rx_crcs' ] = get_port_values(host_name,community,oidstrs['rx_crcs' ]) |
| 304 |
counters['enc_out' ] = get_port_values(host_name,community,oidstrs['enc_out' ]) |
| 305 |
counters['rx_frames'] = get_port_values(host_name,community,oidstrs['rx_frames']) |
| 306 |
counters['tx_frames'] = get_port_values(host_name,community,oidstrs['tx_frames']) |
| 307 |
counters['rx_words' ] = get_port_values(host_name,community,oidstrs['rx_words' ]) |
| 308 |
counters['tx_words' ] = get_port_values(host_name,community,oidstrs['tx_words' ]) |
| 309 |
|
| 310 |
totals = {}
|
| 311 |
totals['rx_crcs'] = 0 |
| 312 |
totals['enc_out'] = 0 |
| 313 |
totals['enc_out_per_mframe'] = 0 |
| 314 |
totals['rx_frames'] = 0 |
| 315 |
totals['tx_frames'] = 0 |
| 316 |
totals['rx_bits'] = 0 |
| 317 |
totals['tx_bits'] = 0 |
| 318 |
|
| 319 |
# special handling of enc_out per million frames |
| 320 |
counters['enc_out_per_mframe'] = {}
|
| 321 |
for k in counters['rx_frames'].keys(): |
| 322 |
if counters['rx_frames'][k] + counters['tx_frames'][k] > 0: |
| 323 |
counters['enc_out_per_mframe'][k] = 1000000*counters['enc_out'][k] / (counters['rx_frames'][k] + counters['tx_frames'][k]) |
| 324 |
else: |
| 325 |
counters['enc_out_per_mframe'][k] = 0 |
| 326 |
|
| 327 |
#debug('counters: ' + str(counters))
|
| 328 |
|
| 329 |
|
| 330 |
# Handle the default case (fetch) |
| 331 |
|
| 332 |
# Per-port values |
| 333 |
for portnum in enabled_ports: |
| 334 |
for counter_type in descriptions: |
| 335 |
print('multigraph %s.port_%d' % (counter_type,portnum-1)) # ARGH: numbering base stuff
|
| 336 |
|
| 337 |
# For some of the graphs, there is an in/out aspect, for others |
| 338 |
# they are combined or not applicable |
| 339 |
if counter_type == 'bits': |
| 340 |
rx_value = counters['rx_words'][portnum] |
| 341 |
tx_value = counters['tx_words'][portnum] |
| 342 |
rx_bits = rx_value * 40 # Each word consists of four |
| 343 |
tx_bits = tx_value * 40 # 10-bit units. |
| 344 |
print('rx.value %d' % rx_bits)
|
| 345 |
print('tx.value %d' % tx_bits)
|
| 346 |
totals['rx_bits'] += rx_bits |
| 347 |
totals['tx_bits'] += tx_bits |
| 348 |
else: |
| 349 |
print('count.value %d' % counters[counter_type][portnum])
|
| 350 |
totals[counter_type] += counters[counter_type][portnum] |
| 351 |
|
| 352 |
# Totals |
| 353 |
for counter_type in descriptions: |
| 354 |
print('multigraph %s' % (counter_type))
|
| 355 |
|
| 356 |
# For some of the graphs, there is an in/out aspect, for others |
| 357 |
# they are combined or not applicable |
| 358 |
if counter_type == 'bits': |
| 359 |
print('rx.value %d' % totals['rx_bits'])
|
| 360 |
print('tx.value %d' % totals['tx_bits'])
|
| 361 |
else: |
| 362 |
print('count.value %d' % totals[counter_type])
|
