Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / nginx / nginx_upstream_multi_ @ a7139bca

Historique | Voir | Annoter | Télécharger (13,9 ko)

1
#!/usr/bin/env python3
2
#
3
# Munin plugin to monitor requests number, cache statuses, http status codes and average request
4
# times of specified nginx upstreams.
5
#
6
# Copyright Igor Borodikhin
7
#
8
# License : GPLv3
9
#
10
# Configuration parameters:
11
# env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default -
12
#              cache http time request)
13
# env.log - log file path (mandatory, ex.: /var/log/nginx/upstream.log)
14
# env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by
15
#                space, e.g.: 10.0.0.1:80 10.0.0.2:8080)
16
# env.statuses - list of http status codes to monitor (optional, default - all statuses,
17
#                e.g.: 200 403 404 410 500 502)
18
# env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles
19
#                   separated by spaces, default - 80)
20
#
21
# ## Installation
22
# Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file
23
# you wish to monitor.
24
#
25
# Specify log_format at /etc/nginx/conf.d/upstream.conf:
26
# log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] \
27
#     cs=[$upstream_cache_status]"
28
#
29
# Use it in your site configuration (/etc/nginx/sites-enabled/anything.conf):
30
# access_log /var/log/nginx/upstream.log upstream;
31
#
32
# Attention! Since the default user (nobody) does not have read permission for nginx log files we
33
# need to run it as root.
34
#
35
# And specify some options in /etc/munin/plugin-conf.d/munin-node:
36
#
37
#     [nginx_upstream_multi_upstream]
38
#     user root
39
#     env.graphs cache http time request
40
#     env.log /var/log/nginx/upstream.log
41
#     env.upstream 10.0.0.1:80 10.0.0.2:8080 unix:/tmp/upstream3
42
#     env.statuses 200 403 404 410 500 502
43
#     env.percentiles 50 80
44
#
45
#  #%# family=contrib
46

    
47
import copy
48
import math
49
import os
50
import re
51
import sys
52
import time
53

    
54

    
55
# How we've been called
56
progName = sys.argv[0]
57
progName = progName[progName.rfind("/") + 1:]
58

    
59

    
60
# Where to store plugin state
61
stateDir = os.environ.get("MUNIN_PLUGSTATE", None)
62

    
63
# Which site configuration we should use
64
siteName = progName[len("nginx_upstream_multi_"):]
65

    
66
# Log path
67
logPath = os.environ.get("log", "/var/log/nginx/access.log")
68

    
69
# Http statuses list
70
httpStatusString = (
71
    "100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;"
72
    "203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;"
73
    "207:Multi-status;226:IM used;300:Multiple choices;301:Moved permanently;"
74
    "302:Moved temporarily;303:See other;304:Not modified;305:Use proxy;307:Temporary redirect;"
75
    "400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;404:Not found;"
76
    "405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;"
77
    "408:Request timeout;409:Conflict;410:Gone;411:Length required;412:Precondition failed;"
78
    "413:Request entity too large;414:Request URI too large;415:Unsupported media type;"
79
    "416:Request range not satisfiable;417:Expectation failed;422:Unprocessable entity;"
80
    "423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;"
81
    "449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;"
82
    "502:Bad gateway;503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;"
83
    "506:Variant also negotiates;507:Insufficient storage;508:Loop detected;"
84
    "509:Bandwidth limit exceeded;510:Not extended")
85

    
86
# an empty list of wanted statuses is interpreted as: all statuses
87
statuses = os.environ.get("statuses", "").split()
88

    
89
httpStatusList = {}
90
for statusString in httpStatusString.split(";"):
91
    [code, title] = statusString.split(":")
92
    if len(statuses) > 0 and code in statuses or len(statuses) == 0:
93
        httpStatusList[code] = {
94
            "title": title,
95
            "requests": 0
96
        }
97

    
98
cacheStatusList = {"MISS": 0, "BYPASS": 0, "EXPIRED": 0, "UPDATING": 0, "STALE": 0, "HIT": 0}
99

    
100
# Parse upstreams
101
upstreams = {}
102
if "upstream" in os.environ:
103
    upstreamString = os.environ["upstream"]
104
    upstreamList = upstreamString.split()
105
    for upstream in upstreamList:
106
        upstreams[upstream] = {
107
            "requests": 0,
108
            "time": 0,
109
            "times": [],
110
            "cache": copy.deepcopy(cacheStatusList),
111
            "http": copy.deepcopy(httpStatusList)
112
        }
113
else:
114
    raise Exception("No upstreams specified")
115

    
116
percentiles = os.environ.get("percentiles", "80").split()
117

    
118
graphs_enabled = os.environ.get("graphs", "cache http time request").split()
119

    
120
now = int(time.time())
121

    
122
lastBytePath = os.path.join(stateDir, "nginx_upstream_multi_{}_lastByte.txt".format(siteName))
123
try:
124
    lastRun = os.path.getmtime(lastBytePath)
125
except OSError:
126
    lastRun = now
127

    
128

    
129
def sanitize(string):
130
    return string.replace(".", "_").replace(":", "_").replace("/", "_").replace("-", "_")
131

    
132

    
133
if len(sys.argv) == 2 and sys.argv[1] == "config":
134
    # Parent graph declaration
135
    print("multigraph nginx_upstream_multi_%s" % siteName.replace(".", "_"))
136
    print("graph_title Requests number")
137
    print("graph_vlabel rps")
138
    print("graph_category webserver")
139
    for upstream in upstreams.keys():
140
        print("us%s_requests.label %s" % (sanitize(upstream), upstream))
141

    
142
    # Requests graph declaration
143
    if "request" in graphs_enabled:
144
        for upstream in upstreams.keys():
145
            print()
146
            print("multigraph nginx_upstream_multi_%s.%s_requests"
147
                  % (sanitize(siteName), sanitize(upstream)))
148
            print("graph_title Requests number - %s" % upstream)
149
            print("graph_vlabel rps")
150
            print("graph_category webserver")
151
            print("us%s_requests.label %s" % (sanitize(upstream), upstream))
152
            print()
153

    
154
    # Times graph declaration
155
    if "time" in graphs_enabled:
156
        for upstream in upstreams.keys():
157
            print()
158
            print("multigraph nginx_upstream_multi_%s.%s_times"
159
                  % (sanitize(siteName), sanitize(upstream)))
160
            print("graph_title Request time - %s" % upstream)
161
            print("graph_vlabel sec.")
162
            print("graph_category webserver")
163
            print("us%s_times.label average" % (sanitize(upstream)))
164
            for percentile in percentiles:
165
                print("us%s_times_percentile_%s.label %s-percentile"
166
                      % (sanitize(upstream), percentile, percentile))
167
            print()
168

    
169
    # HTTP Status codes graph declaration
170
    if "http" in graphs_enabled:
171
        for upstream in upstreams.keys():
172
            print()
173
            print("multigraph nginx_upstream_multi_%s.%s_statuses"
174
                  % (sanitize(siteName), sanitize(upstream)))
175
            print("graph_title HTTP - %s" % upstream)
176
            print("graph_vlabel rps")
177
            print("graph_category webserver")
178
            for status in sorted(httpStatusList.keys()):
179
                print("http%s_%s_status.label %s - %s"
180
                      % (status, sanitize(upstream), status, httpStatusList[status]["title"]))
181
            print()
182

    
183
    # Cache status graph declaration
184
    if "cache" in graphs_enabled:
185
        for upstream in upstreams.keys():
186
            print()
187
            print("multigraph nginx_upstream_multi_%s.%s_cache"
188
                  % (sanitize(siteName), sanitize(upstream)))
189
            print("graph_title Cache - %s" % upstream)
190
            print("graph_vlabel rps")
191
            print("graph_category webserver")
192
            for status in cacheStatusList:
193
                print("us%s_%s_cache.label %s" % (sanitize(status), sanitize(upstream), status))
194
            print()
195
else:
196
    timeElapsed = now - lastRun
197

    
198
    lastByteHandle = None
199

    
200
    try:
201
        lastByteHandle = open(lastBytePath, "r")
202
        lastByte = int(lastByteHandle.read())
203
    except Exception:
204
        lastByte = 0
205

    
206
    if lastByteHandle is not None:
207
        lastByteHandle.close()
208

    
209
    try:
210
        logHandle = open(logPath, "r")
211
    except Exception as e:
212
        print("Log file %s not readable: %s" % (logPath, e.strerror), file=sys.stderr)
213
        sys.exit(1)
214

    
215
    try:
216
        logSize = int(os.path.getsize(logPath))
217
    except ValueError:
218
        logSize = 0
219

    
220
    if logSize < lastByte:
221
        lastByte = 0
222

    
223
    regExp = re.compile(r"ua=\[(.*?)\]\s+ut=\[(.*?)\]\s+us=\[(.*?)\]\s+cs=\[(.*?)\]")
224

    
225
    logHandle.seek(lastByte)
226
    for line in logHandle:
227
        match = regExp.search(line)
228
        if (match):
229
            # Extract data
230
            address = match.group(1)
231
            request_time = match.group(2)
232
            status = match.group(3)
233
            cache = match.group(4)
234

    
235
            # Replace separators by space
236
            address = address.replace(",", " ")
237
            address = address.replace(" : ", " ")
238
            address = re.sub(r"\s+", " ", address)
239

    
240
            request_time = request_time.replace(",", " ")
241
            request_time = request_time.replace(" : ", " ")
242
            request_time = re.sub(r"\s+", " ", request_time)
243

    
244
            status = status.replace(",", " ")
245
            status = status.replace(" : ", " ")
246
            status = re.sub(r"\s+", " ", status)
247

    
248
            cache = cache.replace(",", " ")
249
            cache = cache.replace(" : ", " ")
250
            cache = re.sub(r"\s+", " ", cache)
251

    
252
            addresses = address.split()
253
            times = request_time.split()
254
            statuses = status.split()
255
            caches = cache.split()
256

    
257
            index = 0
258
            for uAddress in addresses:
259
                if uAddress in upstreams.keys():
260
                    try:
261
                        uTime = float(times[index])
262
                    except ValueError:
263
                        uTime = 0
264

    
265
                    if index < len(statuses):
266
                        uStatus = statuses[index]
267
                    else:
268
                        uStatus = "-"
269

    
270
                    if index < len(caches):
271
                        uCache = caches[index]
272
                    else:
273
                        uCache = "-"
274

    
275
                    if uAddress != "-":
276
                        upstreams[uAddress]["requests"] += 1
277
                    if uTime != "-":
278
                        upstreams[uAddress]["time"] += uTime
279
                        upstreams[uAddress]["times"].append(uTime)
280
                    if uStatus != "-" and uStatus in upstreams[uAddress]["http"].keys():
281
                        upstreams[uAddress]["http"][uStatus]["requests"] += 1
282
                    if uCache != "-":
283
                        upstreams[uAddress]["cache"][uCache] += 1
284
                index += 1
285

    
286
    try:
287
        lastByteHandle = open(lastBytePath, "w")
288
        lastByteHandle.write(str(logHandle.tell()))
289
        lastByteHandle.close()
290
    except Exception as e:
291
        print("Failed to write status file (%s): %s" % (lastBytePath, e.strerror), file=sys.stderr)
292
        sys.exit(1)
293

    
294
    logHandle.close()
295

    
296
    # Parent graph data
297
    for upstream in upstreams.keys():
298
        value = 0
299
        if timeElapsed > 0:
300
            value = upstreams[upstream]["requests"] / timeElapsed
301

    
302
        print("us%s_requests.value %s" % (sanitize(upstream), value))
303

    
304
    # Requests graph data
305
    if "request" in graphs_enabled:
306
        for upstream in upstreams.keys():
307
            print()
308
            print("multigraph nginx_upstream_multi_%s.%s_requests"
309
                  % (sanitize(siteName), sanitize(upstream)))
310
            value = 0
311
            if timeElapsed > 0:
312
                value = upstreams[upstream]["requests"] / timeElapsed
313
            print("us%s_requests.value %s" % (sanitize(upstream), value))
314
            print()
315

    
316
    # Times graph data
317
    if "time" in graphs_enabled:
318
        for upstream in upstreams.keys():
319
            uTime = 0
320
            if upstreams[upstream]["requests"] > 0:
321
                uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
322
                upstreams[upstream]["times"].sort()
323
            print()
324
            print("multigraph nginx_upstream_multi_%s.%s_times"
325
                  % (sanitize(siteName), sanitize(upstream)))
326
            print("us%s_times.value %s" % (sanitize(upstream), uTime))
327
            for percentile in percentiles:
328
                percentileValue = 0
329
                if upstreams[upstream]["requests"] > 0:
330
                    uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
331
                    percentileKey = int(percentile) * len(upstreams[upstream]["times"]) / 100
332
                    if len(upstreams[upstream]["times"]) % 2 > 0:
333
                        low = int(math.floor(percentileKey))
334
                        high = int(math.ceil(percentileKey))
335
                        percentileValue = (upstreams[upstream]["times"][low]
336
                                           + upstreams[upstream]["times"][high]) / 2
337
                    else:
338
                        percentileValue = upstreams[upstream]["times"][int(percentileKey)]
339
                print("us%s_times_percentile_%s.value %s"
340
                      % (sanitize(upstream), percentile, percentileValue))
341
            print()
342

    
343
    # HTTP Status codes graph data
344
    if "http" in graphs_enabled:
345
        for upstream in upstreams.keys():
346
            print()
347
            print("multigraph nginx_upstream_multi_%s.%s_statuses"
348
                  % (sanitize(siteName), sanitize(upstream)))
349
            for status in sorted(httpStatusList.keys()):
350
                value = 0
351
                if timeElapsed > 0:
352
                    value = upstreams[upstream]["http"][status]["requests"] / timeElapsed
353

    
354
                print("http%s_%s_status.value %s" % (status, sanitize(upstream), value))
355
            print()
356

    
357
    # Cache status graph data
358
    if "cache" in graphs_enabled:
359
        for upstream in upstreams.keys():
360
            print()
361
            print("multigraph nginx_upstream_multi_%s.%s_cache"
362
                  % (sanitize(siteName), sanitize(upstream)))
363
            for status in cacheStatusList:
364
                value = 0
365
                if timeElapsed > 0:
366
                    value = upstreams[upstream]["cache"][status] / timeElapsed
367

    
368
                print("us%s_%s_cache.value %s" % (sanitize(status), sanitize(upstream), value))
369
            print()