Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / nginx / nginx_upstream_multi_ @ 5f9e882b

Historique | Voir | Annoter | Télécharger (14 ko)

1
#!/usr/bin/env python3
2
#
3
# Munin plugin to monitor requests number, cache statuses, http status codes and average request
4
# times of specified nginx upstreams.
5
#
6
# Copyright Igor Borodikhin
7
#
8
# License : GPLv3
9
#
10
# Configuration parameters:
11
# env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default -
12
#              cache http time request)
13
# env.log - log file path (mandatory, ex.: /var/log/nginx/upstream.log)
14
# env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by
15
#                space, e.g.: 10.0.0.1:80 10.0.0.2:8080)
16
# env.statuses - list of http status codes to monitor (optional, default - all statuses,
17
#                e.g.: 200 403 404 410 500 502)
18
# env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles
19
#                   separated by spaces, default - 80)
20
#
21
# ## Installation
22
# Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file
23
# you wish to monitor.
24
#
25
# Specify log_format at /etc/nginx/conf.d/upstream.conf:
26
# log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] \
27
#     cs=[$upstream_cache_status]"
28
#
29
# Use it in your site configuration (/etc/nginx/sites-enabled/anything.conf):
30
# access_log /var/log/nginx/upstream.log upstream;
31
#
32
# Attention! Since the default user (nobody) does not have read permission for nginx log files we
33
# need to run it as root.
34
#
35
# And specify some options in /etc/munin/plugin-conf.d/munin-node:
36
#
37
#     [nginx_upstream_multi_upstream]
38
#     user root
39
#     env.graphs cache http time request
40
#     env.log /var/log/nginx/upstream.log
41
#     env.upstream 10.0.0.1:80 10.0.0.2:8080 unix:/tmp/upstream3
42
#     env.statuses 200 403 404 410 500 502
43
#     env.percentiles 50 80
44
#
45
#  #%# family=contrib
46

    
47
import copy
48
import math
49
import os
50
import re
51
import sys
52
from time import time
53

    
54

    
55
# How we've been called
56
progName = sys.argv[0]
57
progName = progName[progName.rfind("/")+1:]
58

    
59

    
60
# Where to store plugin state
61
if "MUNIN_PLUGSTATE" in os.environ:
62
    stateDir = os.environ["MUNIN_PLUGSTATE"]
63
else:
64
    stateDir = None
65

    
66
# Which site configuration we should use
67
siteName = progName[len("nginx_upstream_multi_"):]
68

    
69
# Log path
70
if "log" in os.environ:
71
    logPath = os.environ["log"]
72
else:
73
    logPath = "/var/log/nginx/access.log"
74

    
75
# Http statuses list
76
httpStatusString = (
77
    "100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;"
78
    "203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;"
79
    "207:Multi-status;226:IM used;300:Multiple choices;301:Moved permanently;"
80
    "302:Moved temporarily;303:See other;304:Not modified;305:Use proxy;307:Temporary redirect;"
81
    "400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;404:Not found;"
82
    "405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;"
83
    "408:Request timeout;409:Conflict;410:Gone;411:Length required;412:Precondition failed;"
84
    "413:Request entity too large;414:Request URI too large;415:Usupported media type;"
85
    "416:Request range not satisfiable;417:Expectation failed;422:Unprocessable entity;"
86
    "423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;"
87
    "449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;"
88
    "502:Bad gateway;503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;"
89
    "506:Variant also negotiates;507:Insufficient storage;508:Loop detected;"
90
    "509:Bandwidth limit exceeded;510:Not extended")
91

    
92
if "statuses" in os.environ:
93
    statuses = os.environ["statuses"].split()
94
else:
95
    statuses = []
96

    
97
httpStatusList = {}
98
for statusString in httpStatusString.split(";"):
99
    [code, title] = statusString.split(":")
100
    if len(statuses) > 0 and code in statuses or len(statuses) == 0:
101
        httpStatusList[code] = {
102
            "title": title,
103
            "requests": 0
104
        }
105

    
106
cacheStatusList = {"MISS": 0, "BYPASS": 0, "EXPIRED": 0, "UPDATING": 0, "STALE": 0, "HIT": 0}
107

    
108
# Parse upstreams
109
upstreams = {}
110
if "upstream" in os.environ:
111
    upstreamString = os.environ["upstream"]
112
    upstreamList = upstreamString.split()
113
    for upstream in upstreamList:
114
        upstreams[upstream] = {
115
            "requests": 0,
116
            "time": 0,
117
            "times": [],
118
            "cache": copy.deepcopy(cacheStatusList),
119
            "http": copy.deepcopy(httpStatusList)
120
        }
121
else:
122
    raise Exception("No upstreams specified")
123

    
124
if "percentiles" in os.environ:
125
    percentiles = os.environ["percentiles"].split()
126
else:
127
    percentiles = [80]
128

    
129
if "graphs" in os.environ:
130
    graphs_enabled = os.environ["graphs"].split()
131
else:
132
    graphs_enabled = ["cache", "http", "time", "request"]
133

    
134
now = int(time())
135

    
136
lastBytePath = "%s/nginx_upstream_multi_%s_lastByte.txt" % (stateDir, siteName)
137
try:
138
    lastRun = os.path.getmtime(lastBytePath)
139
except OSError:
140
    lastRun = now
141

    
142

    
143
def sanitize(string):
144
    return string.replace(".", "_").replace(":", "_").replace("/", "_").replace("-", "_")
145

    
146

    
147
if len(sys.argv) == 2 and sys.argv[1] == "config":
148
    # Parent graph declaration
149
    print("multigraph nginx_upstream_multi_%s" % siteName.replace(".", "_"))
150
    print("graph_title Requests number")
151
    print("graph_vlabel rps")
152
    print("graph_category webserver")
153
    for upstream in upstreams.keys():
154
        print("us%s_requests.label %s" % (sanitize(upstream), upstream))
155

    
156
    # Requests graph declaration
157
    if "request" in graphs_enabled:
158
        for upstream in upstreams.keys():
159
            print()
160
            print("multigraph nginx_upstream_multi_%s.%s_requests"
161
                  % (sanitize(siteName), sanitize(upstream)))
162
            print("graph_title Requests number - %s" % upstream)
163
            print("graph_vlabel rps")
164
            print("graph_category webserver")
165
            print("us%s_requests.label %s" % (sanitize(upstream), upstream))
166
            print()
167

    
168
    # Times graph declaration
169
    if "time" in graphs_enabled:
170
        for upstream in upstreams.keys():
171
            print()
172
            print("multigraph nginx_upstream_multi_%s.%s_times"
173
                  % (sanitize(siteName), sanitize(upstream)))
174
            print("graph_title Request time - %s" % upstream)
175
            print("graph_vlabel sec.")
176
            print("graph_category webserver")
177
            print("us%s_times.label average" % (sanitize(upstream)))
178
            for percentile in percentiles:
179
                print("us%s_times_percentile_%s.label %s-percentile"
180
                      % (sanitize(upstream), percentile, percentile))
181
            print()
182

    
183
    # HTTP Status codes graph declaration
184
    if "http" in graphs_enabled:
185
        for upstream in upstreams.keys():
186
            print()
187
            print("multigraph nginx_upstream_multi_%s.%s_statuses"
188
                  % (sanitize(siteName), sanitize(upstream)))
189
            print("graph_title HTTP - %s" % upstream)
190
            print("graph_vlabel rps")
191
            print("graph_category webserver")
192
            for status in sorted(httpStatusList.keys()):
193
                print("http%s_%s_status.label %s - %s"
194
                      % (status, sanitize(upstream), status, httpStatusList[status]["title"]))
195
            print()
196

    
197
    # Cache status graph declaration
198
    if "cache" in graphs_enabled:
199
        for upstream in upstreams.keys():
200
            print()
201
            print("multigraph nginx_upstream_multi_%s.%s_cache"
202
                  % (sanitize(siteName), sanitize(upstream)))
203
            print("graph_title Cache - %s" % upstream)
204
            print("graph_vlabel rps")
205
            print("graph_category webserver")
206
            for status in cacheStatusList:
207
                print("us%s_%s_cache.label %s" % (sanitize(status), sanitize(upstream), status))
208
            print()
209
else:
210
    timeElapsed = now - lastRun
211

    
212
    lastByteHandle = None
213

    
214
    try:
215
        lastByteHandle = open(lastBytePath, "r")
216
        lastByte = int(lastByteHandle.read())
217
    except Exception:
218
        lastByte = 0
219

    
220
    if lastByteHandle is not None:
221
        lastByteHandle.close()
222

    
223
    try:
224
        logHandle = open(logPath, "r")
225
    except Exception as e:
226
        print("Log file %s not readable: %s" % (logPath, e.strerror), file=sys.stderr)
227
        sys.exit(1)
228

    
229
    try:
230
        logSize = int(os.path.getsize(logPath))
231
    except ValueError:
232
        logSize = 0
233

    
234
    if logSize < lastByte:
235
        lastByte = 0
236

    
237
    regExp = re.compile(r"ua=\[(.*?)\]\s+ut=\[(.*?)\]\s+us=\[(.*?)\]\s+cs=\[(.*?)\]")
238

    
239
    logHandle.seek(lastByte)
240
    for line in logHandle:
241
        match = regExp.search(line)
242
        if (match):
243
            # Extract data
244
            address = match.group(1)
245
            time = match.group(2)
246
            status = match.group(3)
247
            cache = match.group(4)
248

    
249
            # Replace separators by space
250
            address = address.replace(",", " ")
251
            address = address.replace(" : ", " ")
252
            address = re.sub(r"\s+", " ", address)
253

    
254
            time = time.replace(",", " ")
255
            time = time.replace(" : ", " ")
256
            time = re.sub(r"\s+", " ", time)
257

    
258
            status = status.replace(",", " ")
259
            status = status.replace(" : ", " ")
260
            status = re.sub(r"\s+", " ", status)
261

    
262
            cache = cache.replace(",", " ")
263
            cache = cache.replace(" : ", " ")
264
            cache = re.sub(r"\s+", " ", cache)
265

    
266
            addresses = address.split()
267
            times = time.split()
268
            statuses = status.split()
269
            caches = cache.split()
270

    
271
            index = 0
272
            for uAddress in addresses:
273
                if uAddress in upstreams.keys():
274
                    try:
275
                        uTime = float(times[index])
276
                    except ValueError:
277
                        uTime = 0
278

    
279
                    if index < len(statuses):
280
                        uStatus = statuses[index]
281
                    else:
282
                        uStatus = "-"
283

    
284
                    if index < len(caches):
285
                        uCache = caches[index]
286
                    else:
287
                        uCache = "-"
288

    
289
                    if uAddress != "-":
290
                        upstreams[uAddress]["requests"] += 1
291
                    if uTime != "-":
292
                        upstreams[uAddress]["time"] += uTime
293
                        upstreams[uAddress]["times"].append(uTime)
294
                    if uStatus != "-" and uStatus in upstreams[uAddress]["http"].keys():
295
                        upstreams[uAddress]["http"][uStatus]["requests"] += 1
296
                    if uCache != "-":
297
                        upstreams[uAddress]["cache"][uCache] += 1
298
                index += 1
299

    
300
    try:
301
        lastByteHandle = open(lastBytePath, "w")
302
        lastByteHandle.write(str(logHandle.tell()))
303
        lastByteHandle.close()
304
    except Exception as e:
305
        print("Failed to write status file (%s): %s" % (lastBytePath, e.strerror), file=sys.stderr)
306
        sys.exit(1)
307

    
308
    logHandle.close()
309

    
310
    # Parent graph data
311
    for upstream in upstreams.keys():
312
        value = 0
313
        if timeElapsed > 0:
314
            value = upstreams[upstream]["requests"] / timeElapsed
315

    
316
        print("us%s_requests.value %s" % (sanitize(upstream), value))
317

    
318
    # Requests graph data
319
    if "request" in graphs_enabled:
320
        for upstream in upstreams.keys():
321
            print()
322
            print("multigraph nginx_upstream_multi_%s.%s_requests"
323
                  % (sanitize(siteName), sanitize(upstream)))
324
            value = 0
325
            if timeElapsed > 0:
326
                value = upstreams[upstream]["requests"] / timeElapsed
327
            print("us%s_requests.value %s" % (sanitize(upstream), value))
328
            print()
329

    
330
    # Times graph data
331
    if "time" in graphs_enabled:
332
        for upstream in upstreams.keys():
333
            uTime = 0
334
            if upstreams[upstream]["requests"] > 0:
335
                uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
336
                upstreams[upstream]["times"].sort()
337
            print()
338
            print("multigraph nginx_upstream_multi_%s.%s_times"
339
                  % (sanitize(siteName), sanitize(upstream)))
340
            print("us%s_times.value %s" % (sanitize(upstream), uTime))
341
            for percentile in percentiles:
342
                percentileValue = 0
343
                if upstreams[upstream]["requests"] > 0:
344
                    uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
345
                    percentileKey = int(percentile) * len(upstreams[upstream]["times"]) / 100
346
                    if len(upstreams[upstream]["times"]) % 2 > 0:
347
                        low = int(math.floor(percentileKey))
348
                        high = int(math.ceil(percentileKey))
349
                        percentileValue = (upstreams[upstream]["times"][low]
350
                                           + upstreams[upstream]["times"][high]) / 2
351
                    else:
352
                        percentileValue = upstreams[upstream]["times"][int(percentileKey)]
353
                print("us%s_times_percentile_%s.value %s"
354
                      % (sanitize(upstream), percentile, percentileValue))
355
            print()
356

    
357
    # HTTP Status codes graph data
358
    if "http" in graphs_enabled:
359
        for upstream in upstreams.keys():
360
            print()
361
            print("multigraph nginx_upstream_multi_%s.%s_statuses"
362
                  % (sanitize(siteName), sanitize(upstream)))
363
            for status in sorted(httpStatusList.keys()):
364
                value = 0
365
                if timeElapsed > 0:
366
                    value = upstreams[upstream]["http"][status]["requests"] / timeElapsed
367

    
368
                print("http%s_%s_status.value %s" % (status, sanitize(upstream), value))
369
            print()
370

    
371
    # Cache status graph data
372
    if "cache" in graphs_enabled:
373
        for upstream in upstreams.keys():
374
            print()
375
            print("multigraph nginx_upstream_multi_%s.%s_cache"
376
                  % (sanitize(siteName), sanitize(upstream)))
377
            for status in cacheStatusList:
378
                value = 0
379
                if timeElapsed > 0:
380
                    value = upstreams[upstream]["cache"][status] / timeElapsed
381

    
382
                print("us%s_%s_cache.value %s" % (sanitize(status), sanitize(upstream), value))
383
            print()