Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / nginx / nginx_upstream_multi_ @ 9ce70486

Historique | Voir | Annoter | Télécharger (13,7 ko)

1 6c765698 majesty
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
# vim: set fileencoding=utf-8
4
#
5
# Munin plugin to monitor requests number, cache statuses, http status codes and average request times of
6
# specified nginx upstreams.
7
#
8
# Copyright Igor Borodikhin
9
#
10
# License : GPLv3
11
#
12
# Configuration parameters:
13
# env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default - cache http time request)
14
# env.log - log file path (mandatory, ex.: /var/log/nginx/upstream.log)
15
# env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by space, ex.: 10.0.0.1:80 10.0.0.2:8080)
16
# env.statuses - list of http status codes to monitor (optional, default - all statuses, ex.: 200 403 404 410 500 502)
17
# env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles separated by spaces, default - 80)
18
#
19
# ## Installation
20
# Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file you wish to monitor.
21
#
22
# Specify log_format at /etc/nginx/conf.d/upstream.conf:
23
# log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] cs=[$upstream_cache_status]"
24
#
25
# Use it in your site configuration (/etc/nginx/sites-enabled/anything.conf):
26
# access_log /var/log/nginx/upstream.log upstream;
27
#
28 6efaef76 Igor Borodikhin
# Attention! Because munin-node does not have read permission for nginx log files we need to run it as root.
29
#
30 2eb7552c Igor Borodikhin
# And specify some options in /etc/munin/plugin-conf.d/munin-node:
31 6c765698 majesty
#
32
#     [nginx_upstream_multi_upstream]
33 6efaef76 Igor Borodikhin
#     user root
34 6c765698 majesty
#     env.graphs cache http time request
35
#     env.log /var/log/nginx/upstream.log
36
#     env.upstream 10.0.0.1:80 10.0.0.2:8080 unix:/tmp/upstream3
37
#     env.statuses 200 403 404 410 500 502
38
#     env.percentiles 50 80
39
#
40
#%# family=contrib
41
42
import os, sys, re, copy, math
43
from time import time
44
45
# How we've been called
46
progName = sys.argv[0]
47
progName = progName[progName.rfind("/")+1:]
48
49
# Where to store plugin state
50
if "MUNIN_PLUGSTATE" in os.environ:
51
    stateDir = os.environ["MUNIN_PLUGSTATE"]
52
else:
53
    stateDir = None
54
55
# Which site configuration we should use
56
siteName = progName[len("nginx_upstream_multi_"):]
57
58
# Log path
59
if "log" in os.environ:
60
    logPath = os.environ["log"]
61
else:
62
    logPath = "/var/log/nginx/access.log"
63
64
# Http statuses list
65
httpStatusString = ("100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;"
66
"203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;207:Multi-status;"
67
"226:IM used;300:Multiple choices;301:Moved permanently;302:Moved temporarily;303:See other;304:Not modified;"
68
"305:Use proxy;307:Temporary redirect;400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;"
69
"404:Not found;405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;408:Request timeout;"
70
"409:Conflict;410:Gone;411:Length required;412:Precondition failed;413:Request entity too large;"
71
"414:Request URI too large;415:Usupported media type;416:Request range not satisfiable;417:Expectation failed;"
72
"422:Unprocessable entity;423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;"
73
"449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;502:Bad gateway;"
74
"503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;506:Variant also negotiates;"
75
"507:Insufficient storage;508:Loop detected;509:Bandwidth limit exceeded;510:Not extended")
76
77
if "statuses" in os.environ:
78
    statuses = os.environ["statuses"].split()
79
else:
80
    statuses = []
81
82
httpStatusList = {}
83
for statusString in httpStatusString.split(";"):
84
    [code, title] = statusString.split(":")
85
    if len(statuses) > 0 and code in statuses or len(statuses) == 0:
86
        httpStatusList[code] = {
87
            "title" : title,
88
            "requests" : 0
89
        }
90
91
cacheStatusList = { "MISS" : 0, "BYPASS" : 0, "EXPIRED" : 0, "UPDATING" : 0, "STALE" : 0, "HIT" : 0 }
92
93
# Parse upstreams
94
upstreams = {}
95
if "upstream" in os.environ:
96
    upstreamString = os.environ["upstream"]
97
    upstreamList = upstreamString.split()
98
    for upstream in upstreamList:
99
        upstreams[upstream] = {
100
            "requests" : 0,
101
            "time"     : 0,
102
            "times"    : [],
103
            "cache"    : copy.deepcopy(cacheStatusList),
104
            "http"     : copy.deepcopy(httpStatusList)
105
        }
106
else:
107
    raise Exception("No upstreams specified")
108
109
if "percentiles" in os.environ:
110
    percentiles = os.environ["percentiles"].split()
111
else:
112
    percentiles = [80]
113
114
if "graphs" in os.environ:
115
    graphs_enabled = os.environ["graphs"].split()
116
else:
117
    graphs_enabled = ["cache", "http", "time", "request"]
118
119
now = int(time())
120
121
lastBytePath = "%s/nginx_upstream_multi_%s_lastByte.txt" % (stateDir, siteName)
122
try:
123
    lastRun = os.path.getmtime(lastBytePath)
124
except OSError:
125
    lastRun = now
126
127
128
def sanitize(string):
129
    return string.replace(".", "_").replace(":", "_").replace("/", "_").replace("-", "_")
130
131
if len(sys.argv) == 2 and sys.argv[1] == "config":
132
    # Parent graph declaration
133
    print "multigraph nginx_upstream_multi_%s" % siteName.replace(".", "_")
134
    print "graph_title Requests number"
135
    print "graph_vlabel rps"
136 3c98d069 dipohl
    print "graph_category webserver"
137 6c765698 majesty
    for upstream in upstreams.keys():
138
        print "us%s_requests.label %s" % (sanitize(upstream), upstream)
139
140
    # Requests graph declaration
141
    if "request" in graphs_enabled:
142
        for upstream in upstreams.keys():
143
            print ""
144
            print "multigraph nginx_upstream_multi_%s.%s_requests" % (sanitize(siteName), sanitize(upstream))
145
            print "graph_title Requests number - %s" % upstream
146
            print "graph_vlabel rps"
147 3c98d069 dipohl
            print "graph_category webserver"
148 6c765698 majesty
            print "us%s_requests.label %s" % (sanitize(upstream), upstream)
149
            print ""
150
151
    # Times graph declaration
152
    if "time" in graphs_enabled:
153
        for upstream in upstreams.keys():
154
            print ""
155
            print "multigraph nginx_upstream_multi_%s.%s_times" % (sanitize(siteName), sanitize(upstream))
156
            print "graph_title Request time - %s" % upstream
157
            print "graph_vlabel sec."
158 3c98d069 dipohl
            print "graph_category webserver"
159 6c765698 majesty
            print "us%s_times.label average" % (sanitize(upstream))
160
            for percentile in percentiles:
161
                print "us%s_times_percentile_%s.label %s-percentile" % (sanitize(upstream), percentile, percentile)
162
            print ""
163
164
    # HTTP Status codes graph declaration
165
    if "http" in graphs_enabled:
166
        for upstream in upstreams.keys():
167
            print ""
168
            print "multigraph nginx_upstream_multi_%s.%s_statuses" % (sanitize(siteName), sanitize(upstream))
169
            print "graph_title HTTP - %s" % upstream
170
            print "graph_vlabel rps"
171 3c98d069 dipohl
            print "graph_category webserver"
172 6c765698 majesty
            keylist = httpStatusList.keys()
173
            keylist.sort()
174
            for status in keylist:
175
                print "http%s_%s_status.label %s - %s" % (status, sanitize(upstream), status, httpStatusList[status]["title"])
176
            print ""
177
178
    # Cache status graph declaration
179
    if "cache" in graphs_enabled:
180
        for upstream in upstreams.keys():
181
            print ""
182
            print "multigraph nginx_upstream_multi_%s.%s_cache" % (sanitize(siteName), sanitize(upstream))
183
            print "graph_title Cache - %s" % upstream
184
            print "graph_vlabel rps"
185 3c98d069 dipohl
            print "graph_category webserver"
186 6c765698 majesty
            for status in cacheStatusList:
187
                print "us%s_%s_cache.label %s" % (sanitize(status), sanitize(upstream), status)
188
            print ""
189
else:
190
    timeElapsed = now - lastRun
191
192
    lastByteHandle = None
193
194
    try:
195
        lastByteHandle = open(lastBytePath, "r")
196
        lastByte = int(lastByteHandle.read())
197
    except Exception:
198
        lastByte = 0
199
200
    if lastByteHandle != None:
201
        lastByteHandle.close()
202
203
    try:
204
        logHandle = open(logPath, "r")
205 6efaef76 Igor Borodikhin
    except Exception as e:
206
        print "Log file %s not readable: %s" % (logPath, e.strerror)
207 6c765698 majesty
        sys.exit(1)
208
209
    try:
210
        logSize = int(os.path.getsize(logPath))
211
    except ValueError:
212
        logSize = 0
213
214
    if logSize < lastByte:
215
        lastByte = 0
216
217
    regExp = re.compile(r"ua=\[(.*?)\]\s+ut=\[(.*?)\]\s+us=\[(.*?)\]\s+cs=\[(.*?)\]")
218
219
    logHandle.seek(lastByte)
220
    for line in logHandle:
221
        match = regExp.search(line)
222
        if (match):
223
            # Extract data
224
            address = match.group(1)
225
            time    = match.group(2)
226
            status  = match.group(3)
227
            cache   = match.group(4)
228
229
            # Replace separators by space
230
            address = address.replace(",", " ")
231
            address = address.replace(" : ", " ")
232
            address = re.sub("\s+", " ", address)
233
234
            time    = time.replace(",", " ")
235
            time    = time.replace(" : ", " ")
236
            time    = re.sub("\s+", " ", time)
237
238
            status  = status.replace(",", " ")
239
            status  = status.replace(" : ", " ")
240
            status  = re.sub("\s+", " ", status)
241
242
            cache   = cache.replace(",", " ")
243
            cache   = cache.replace(" : ", " ")
244
            cache   = re.sub("\s+", " ", cache)
245
246
            addresses = address.split()
247
            times     = time.split()
248
            statuses  = status.split()
249
            caches    = cache.split()
250
251
            index = 0
252
            for uAddress in addresses:
253
                if uAddress in upstreams.keys():
254
                    try:
255
                        uTime    = float(times[index])
256
                    except ValueError:
257
                        uTime    = 0
258
259
                    if index < len(statuses):
260
                        uStatus  = statuses[index]
261
                    else:
262
                        uStatus = "-"
263
264
                    if index < len(caches):
265
                        uCache   = caches[index]
266
                    else:
267
                        uCache = "-"
268
269
                    if uAddress != "-":
270
                        upstreams[uAddress]["requests"]                  += 1
271
                    if uTime != "-":
272
                        upstreams[uAddress]["time"]                      += uTime
273
                        upstreams[uAddress]["times"].append(uTime)
274
                    if uStatus != "-" and uStatus in upstreams[uAddress]["http"].keys():
275
                        upstreams[uAddress]["http"][uStatus]["requests"] += 1
276
                    if uCache != "-":
277
                        upstreams[uAddress]["cache"][uCache]             += 1
278
                index += 1
279
280
    try:
281
        lastByteHandle = open(lastBytePath, "w")
282
        lastByteHandle.write(str(logHandle.tell()))
283
        lastByteHandle.close()
284 6efaef76 Igor Borodikhin
    except Exception as e:
285
        print e.strerror
286 6c765698 majesty
        sys.exit(1)
287
288
    logHandle.close()
289
290
    # Parent graph data
291
    for upstream in upstreams.keys():
292
        value = 0
293
        if timeElapsed > 0:
294
            value = upstreams[upstream]["requests"] / timeElapsed
295
296
        print "us%s_requests.value %s" % (sanitize(upstream), value)
297
298
    # Requests graph data
299
    if "request" in graphs_enabled:
300
        for upstream in upstreams.keys():
301
            print ""
302
            print "multigraph nginx_upstream_multi_%s.%s_requests" % (sanitize(siteName), sanitize(upstream))
303
304
            value = 0
305
            if timeElapsed > 0:
306
                value = upstreams[upstream]["requests"] / timeElapsed
307
308
            print "us%s_requests.value %s" % (sanitize(upstream), value)
309
            print ""
310
311
    # Times graph data
312
    if "time" in graphs_enabled:
313
        for upstream in upstreams.keys():
314
            uTime = 0
315
            if upstreams[upstream]["requests"] > 0:
316
                uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
317
                upstreams[upstream]["times"].sort()
318
            print ""
319
            print "multigraph nginx_upstream_multi_%s.%s_times" % (sanitize(siteName), sanitize(upstream))
320
            print "us%s_times.value %s" % (sanitize(upstream), uTime)
321
            for percentile in percentiles:
322
                percentileValue = 0
323
                if upstreams[upstream]["requests"] > 0:
324
                    uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
325
                    percentileKey = int(percentile) * len(upstreams[upstream]["times"]) / 100
326
                    if len(upstreams[upstream]["times"])%2 > 0:
327
                        low = int(math.floor(percentileKey))
328
                        high = int(math.ceil(percentileKey))
329
                        percentileValue = (upstreams[upstream]["times"][low] + upstreams[upstream]["times"][high]) / 2
330
                    else:
331
                        percentileValue = upstreams[upstream]["times"][int(percentileKey)]
332
                print "us%s_times_percentile_%s.value %s" % (sanitize(upstream), percentile, percentileValue)
333
            print ""
334
335
    # HTTP Status codes graph data
336
    if "http" in graphs_enabled:
337
        for upstream in upstreams.keys():
338
            print ""
339
            print "multigraph nginx_upstream_multi_%s.%s_statuses" % (sanitize(siteName), sanitize(upstream))
340
            keylist = httpStatusList.keys()
341
            keylist.sort()
342
            for status in keylist:
343
                value = 0
344
                if timeElapsed > 0:
345
                    value = upstreams[upstream]["http"][status]["requests"] / timeElapsed
346
347
                print "http%s_%s_status.value %s" % (status, sanitize(upstream), value)
348
            print ""
349
350
    # Cache status graph data
351
    if "cache" in graphs_enabled:
352
        for upstream in upstreams.keys():
353
            print ""
354
            print "multigraph nginx_upstream_multi_%s.%s_cache" % (sanitize(siteName), sanitize(upstream))
355
            for status in cacheStatusList:
356
                value = 0
357
                if timeElapsed > 0:
358
                    value = upstreams[upstream]["cache"][status] / timeElapsed
359
360
                print "us%s_%s_cache.value %s" % (sanitize(status), sanitize(upstream), value)
361
            print ""