Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / snmp / snmp__brocade_ifs @ 17f78427

Historique | Voir | Annoter | Télécharger (14,2 ko)

1
#!/usr/bin/env python
2

    
3
"""
4
Munin plugin which reports selected counters regarding ports on a
5
Brocade SAN FC-switch. Only enabled ports are considered.
6

    
7
The counters shown:
8

    
9
enc_out:            Encoding errors outside FC frame.
10
                    Not as interesting as enc_out_per_mframe,
11
                    but it reflects the absolute values, instead
12
                    of being put in relation to the port's traffic.
13

    
14
enc_out_per_mframe: As above, but per million frames of traffic.
15
                    If there is a high number for this counter,
16
                    it could reflect:
17
                     - If there is also a high value for
18
                       rx_crcs for the port, then there is likely
19
                       a GBIC/SFP problem.
20
                     - If there the value of rx_crcs for the port
21
                       is low, there is likely a cable/connector
22
                       problem.
23

    
24
rx_crcs:            CRC errors detected in received frames.
25
                    Together with enc_out errors, CRC errors
26
                    indicate a GBIC/SFP problem.
27

    
28
bits:               Number of bits transmitted(tx)/received(rx)
29
                    by the port. Inspecting this graph will help
30
                    determining if the port is saturated.
31

    
32
When symlinking to the plugin, indicate hostname like this:
33
snmp_HOSTNAME_brocade_ifs
34

    
35
# Special requirements:
36
#  - the pysnmp module; on RHEL 6 with EPEL 6, you may simply yum-
37
#    install it
38
"""
39

    
40
# Note: In the SNMP output from brocade switches, the interesting
41
# counters are named with numbers starting with 1, while the
42
# ports' real names on the box and in the administration interface
43
# start with 0. And there doesn't seem to be a way to map between
44
# ifDesc and the interesting crc and enc_out counters :-(
45
# Therefore, this plugin is Brocade-specific, and thus some
46
# manipulation of port numbers are performed for the output
47
# of this plugin (see comments marked ARGH below).
48

    
49
# TODOs:
50
#  - implement snmpconf?
51

    
52
# Munin magic markers
53
#%# family=manual
54
#%# capabilities=
55

    
56
# http://community.brocade.com/servlet/JiveServlet/download/5581-1453/portErrShow.pdf
57
# is useful when trying to understand counters on a Brocade switch.
58

    
59
# Author: Troels Arvin <tra@sst.dk>
60
# See http://troels.arvin.dk/code/munin/ for latest version.
61

    
62
# Only tested with Red Hat Enterprise Linux 5, currently.
63

    
64
# Released according to the "New BSD License" AKA the 3-clause
65
# BSD License:
66
# ====================================================================
67
# Copyright (c) 2011, Danish National Board of Health.
68
# All rights reserved.
69
#
70
# Redistribution and use in source and binary forms, with or without
71
# modification, are permitted provided that the following conditions are met:
72
#     * Redistributions of source code must retain the above copyright
73
#       notice, this list of conditions and the following disclaimer.
74
#     * Redistributions in binary form must reproduce the above copyright
75
#       notice, this list of conditions and the following disclaimer in the
76
#       documentation and/or other materials provided with the distribution.
77
#     * Neither the name of the  the Danish National Board of Health nor the
78
#       names of its contributors may be used to endorse or promote products
79
#       derived from this software without specific prior written permission.
80
#
81
# THIS SOFTWARE IS PROVIDED BY the Danish National Board of Health ''AS IS'' AND ANY
82
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
83
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
84
# DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY
85
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
86
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
87
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
88
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
89
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
90
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
91
# ====================================================================
92

    
93
# $Id: brocade_san_switch_ports_ 15443 2011-03-03 12:23:56Z tra $
94

    
95
import os, sys, re
96
from pysnmp.entity.rfc3413.oneliner import cmdgen
97

    
98
# For reference:
99
# SW-MIB::swFCPortLinkState   = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6
100
# SW-MIB::swFCPortTxWords     = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.11
101
# SW-MIB::swFCPortRxWords     = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.12
102
# SW-MIB::swFCPortTxFrames    = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.13
103
# SW-MIB::swFCPortRxFrames    = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.14
104
# SW-MIB::swFCPortRxCrcs      = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.22
105
# SW-MIB::swFCPortRxEncOutFrs = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26
106

    
107
# OID strings must be without leading dot in this script
108
port_link_state_oidstr = '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6'
109
oidstrs = {
110
    'rx_crcs'           : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.22',
111
    'enc_out'           : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26',
112
    'tx_words'          : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.11',
113
    'rx_words'          : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.12',
114
    'tx_frames'         : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.13',
115
    'rx_frames'         : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.14',
116
}
117

    
118
descriptions = {
119
    'rx_crcs'           : 'the number of CRC errors detected for frames received',
120
    'enc_out'           : 'encoding errors outside FC frame',
121
    'enc_out_per_mframe': 'enc errors outside FC frame, per million frames of rx+tx traffic',
122
    'bits'              : 'received(rx)/transmitted(tx) bits'
123
}
124

    
125
rrd_types = {
126
    'rx_crcs'           : 'GAUGE',
127
    'enc_out'           : 'GAUGE',
128
    'enc_out_per_mframe': 'GAUGE',
129
    'bits'              : 'COUNTER'
130
}
131

    
132

    
133
# Some helper functions:
134

    
135
def bailout(msg):
136
    sys.stderr.write(msg+"\n")
137
    sys.exit(1)
138

    
139
def debug(msg):
140
    print('Debug: %s\n' % msg)
141

    
142
# Break OID-string in to a tuple of elements
143
def oidstr2tuple(oidstr):
144
    int_list = [ int(s) for s in oidstr.split('.') ]
145
    return tuple(int_list)
146

    
147
# if object_name is 1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26.1, return
148
#                   1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26
149
def get_ObjectName_subtree(obj):
150
   return obj[:len(obj)-1]
151

    
152
# Convert SNMP objects to simpler structure, and cut off
153
# excessive return-value data (which bulkCmd may generated)
154
def varBindTable2plainDict(varBindTable):
155
    ret_dict = {}
156
    wanted_subtree = get_ObjectName_subtree(varBindTable[0][0][0])
157
    #debug('wanted_subtree: '+str(wanted_subtree))
158
    for varBindTableRow in varBindTable:
159
        if get_ObjectName_subtree(varBindTableRow[0][0]) == wanted_subtree:
160
            portnum = varBindTableRow[0][0][-1]
161
            count = int(varBindTableRow[0][1])
162
            ret_dict[portnum] = count
163
        else:
164
            #debug('Skipped '+str(varBindTableRow))
165
            pass
166
    #debug('ret_dict: '+str(ret_dict))
167
    return ret_dict
168

    
169

    
170

    
171
# The more interesting functions:
172

    
173
# Honor the munin-APIs "config" command
174
def print_config(host_name,enabled_ports):
175
    print('host_name %s' % host_name)
176

    
177
    # Per-port
178
    for counter_type in descriptions:
179
        for portnum in enabled_ports:
180
            print('multigraph %s.port_%d' % (counter_type,portnum-1))  # ARGH: numbering base stuff
181
            print('graph_title Port %d %s' % (portnum-1,counter_type)) # ARGH: numbering base stuff
182
            print('graph_args --base 1000 -l 0')
183
            print('graph_category san')
184
            print('graph_info This graph shows the count of %s' % descriptions[counter_type])
185

    
186
            if counter_type == 'bits':
187
                print('graph_vlabel bits rx (-) / tx (+) per ${graph_period}')
188
                print('graph_order rx tx')
189
                print('rx.label rx')
190
                print('rx.graph no')
191
                print('rx.type %s' % rrd_types[counter_type])
192
                print('rx.max 20000000000') # initial-spike prevention: 20Gbit/s is max FC speed
193
                print('tx.label bps')
194
                print('tx.negative rx')
195
                print('tx.type %s' % rrd_types[counter_type])
196
                print('tx.max 20000000000') # initial-spike prevention: 20Gbit/s is max FC speed
197
            else:
198
                print('graph_vlabel count')
199
                print('count.label count')
200
                print('count.type %s' % rrd_types[counter_type])
201

    
202
    # Totals
203
    for counter_type in descriptions:
204
        print('multigraph %s' % counter_type)
205
        print('graph_title %s total %s' % (host_name,counter_type))
206
        print('graph_args --base 1000 -l 0')
207
        print('graph_category san')
208
        print('graph_info This graph shows the total count of %s across all ports' % descriptions[counter_type])
209

    
210
        if counter_type == 'bits':
211
            print('graph_vlabel bits rx (-) / tx (+) per ${graph_period}')
212
            print('rx.label rx')
213
            print('rx.graph no')
214
            print('rx.type %s' % rrd_types[counter_type])
215
            print('rx.max 800000000000') # initial-spike prevention: Assuming a max of 40 ports with each 20Gbit/s max
216
            print('tx.label bps')
217
            print('tx.negative rx')
218
            print('tx.type %s' % rrd_types[counter_type])
219
            print('tx.max 800000000000') # initial-spike prevention: Assuming a max of 40 ports with each 20Gbit/s max
220
        else:
221
            print('graph_vlabel count')
222
            print('count.label count')
223
            print('count.type %s' % rrd_types[counter_type])
224

    
225
# We don't care for disabled ports
226
def get_enabled_ports(host_name,community):
227
    link_states = get_port_values(host_name,community,port_link_state_oidstr)
228
    # status 1 means enabled
229
    return [ portnum for portnum in link_states if link_states[portnum] == 1 ]
230

    
231
# Talk to the SNMP agent performing the equivalent of an snmpwalk from
232
# the starting point indicated by the oid_start_tpl tuple.
233
# Handle potential errors.
234
def pull_values(host_name,community,oid_start_tpl):
235
    try:
236
        errorIndication, errorStatus, errorIndex, varBindTable = cmdgen.CommandGenerator().bulkCmd(
237
            cmdgen.CommunityData('whatever', community),
238
            cmdgen.UdpTransportTarget((host_name, 161)),
239
            300, 0,
240
            (oid_start_tpl)
241
        )
242
    except Exception, e:
243
        err("Walking %s threw exception: %s" % (oid_start_str,str(e)))
244
    if errorStatus:
245
        err("Walking %s failed: %s" % (oid_start_str,errorStatus.prettyPrint()))
246
    if errorIndication:
247
        err("Walking %s failed with errorIndication=" % (oid_start_str,errorIndication))
248
    if len(varBindTable) < 1:
249
        err("Empty result from walk of %s" % oid_start_str)
250
    #debug('Pull result: %s' % varBindTable)
251
    return varBindTable
252

    
253
# Combine oidstr2tupl, pull_values and varBindTable2plainDict.
254
# Return dict of port-number => count
255
def get_port_values(host_name,community,oid_start_str):
256
    return varBindTable2plainDict(
257
        pull_values(host_name,community,oidstr2tuple(oid_start_str))
258
    )
259

    
260
# Initial sanity check
261
n_args=len(sys.argv)
262
if n_args > 2:
263
    # At most one arg expected
264
    bailout('%d arguments given - expecting only one' % n_args)
265

    
266
# Make sure that multigraphs are supported
267
if 'MUNIN_CAP_MULTIGRAPH' not in os.environ:
268
    bailout('MUNIN_CAP_MULTIGRAPH not found in environment')
269

    
270
# Parse host_name and counter type from arg0
271
called_as = os.path.basename(sys.argv[0])
272
regex_str = r'^snmp_(.+)_brocade_ifs'
273
match = re.match(regex_str, called_as)
274
if match:
275
    host_name     = match.group(1)
276
else:
277
    bailout('Missing host_name and/or counter type')
278

    
279
# Determine SNMP community
280
try:
281
    community = os.environ['community']
282
except:
283
    community = 'public'
284

    
285
enabled_ports = get_enabled_ports(host_name,community)
286

    
287
# See how we were called
288
if n_args == 2:
289
    # An argument was given, so let's not simply print
290
    # values.
291
    arg = sys.argv[1]
292
    if arg == 'config':
293
        print_config(host_name,enabled_ports)
294
        sys.exit(0)
295
    if arg == 'fetch':
296
        pass
297
    else:
298
        bailout("Unknown argument '%s'" % arg)
299
        sys.exit(1)
300

    
301
# Prepare some structures
302
counters = {}
303
counters['rx_crcs'  ] = get_port_values(host_name,community,oidstrs['rx_crcs'  ])
304
counters['enc_out'  ] = get_port_values(host_name,community,oidstrs['enc_out'  ])
305
counters['rx_frames'] = get_port_values(host_name,community,oidstrs['rx_frames'])
306
counters['tx_frames'] = get_port_values(host_name,community,oidstrs['tx_frames'])
307
counters['rx_words' ] = get_port_values(host_name,community,oidstrs['rx_words' ])
308
counters['tx_words' ] = get_port_values(host_name,community,oidstrs['tx_words' ])
309

    
310
totals = {}
311
totals['rx_crcs']            = 0
312
totals['enc_out']            = 0
313
totals['enc_out_per_mframe'] = 0
314
totals['rx_frames']          = 0
315
totals['tx_frames']          = 0
316
totals['rx_bits']            = 0
317
totals['tx_bits']            = 0
318

    
319
# special handling of enc_out per million frames
320
counters['enc_out_per_mframe'] = {}
321
for k in counters['rx_frames'].keys():
322
    if counters['rx_frames'][k] + counters['tx_frames'][k] > 0:
323
        counters['enc_out_per_mframe'][k] = 1000000*counters['enc_out'][k] / (counters['rx_frames'][k] + counters['tx_frames'][k])
324
    else:
325
        counters['enc_out_per_mframe'][k] = 0
326

    
327
#debug('counters: ' + str(counters))
328

    
329

    
330
# Handle the default case (fetch)
331

    
332
# Per-port values
333
for portnum in enabled_ports:
334
    for counter_type in descriptions:
335
        print('multigraph %s.port_%d' % (counter_type,portnum-1)) # ARGH: numbering base stuff
336

    
337
        # For some of the graphs, there is an in/out aspect, for others
338
        # they are combined or not applicable
339
        if counter_type == 'bits':
340
            rx_value = counters['rx_words'][portnum]
341
            tx_value = counters['tx_words'][portnum]
342
            rx_bits = rx_value * 40  # Each word consists of four
343
            tx_bits = tx_value * 40  # 10-bit units.
344
            print('rx.value %d' % rx_bits)
345
            print('tx.value %d' % tx_bits)
346
            totals['rx_bits'] += rx_bits
347
            totals['tx_bits'] += tx_bits
348
        else:
349
            print('count.value %d' % counters[counter_type][portnum])
350
            totals[counter_type] += counters[counter_type][portnum]
351

    
352
# Totals
353
for counter_type in descriptions:
354
    print('multigraph %s' % (counter_type))
355

    
356
    # For some of the graphs, there is an in/out aspect, for others
357
    # they are combined or not applicable
358
    if counter_type == 'bits':
359
        print('rx.value %d' % totals['rx_bits'])
360
        print('tx.value %d' % totals['tx_bits'])
361
    else:
362
        print('count.value %d' % totals[counter_type])