Projet

Général

Profil

Paste
Télécharger au format
Statistiques
| Branche: | Révision:

root / plugins / nginx / nginx_upstream_multi_ @ a7139bca

Historique | Voir | Annoter | Télécharger (13,9 ko)

1 13bd1599 Lars Kruse
#!/usr/bin/env python3
2 6c765698 majesty
#
3 5f9e882b Lars Kruse
# Munin plugin to monitor requests number, cache statuses, http status codes and average request
4
# times of specified nginx upstreams.
5 6c765698 majesty
#
6
# Copyright Igor Borodikhin
7
#
8
# License : GPLv3
9
#
10
# Configuration parameters:
11 5f9e882b Lars Kruse
# env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default -
12
#              cache http time request)
13 6c765698 majesty
# env.log - log file path (mandatory, ex.: /var/log/nginx/upstream.log)
14 5f9e882b Lars Kruse
# env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by
15
#                space, e.g.: 10.0.0.1:80 10.0.0.2:8080)
16
# env.statuses - list of http status codes to monitor (optional, default - all statuses,
17
#                e.g.: 200 403 404 410 500 502)
18
# env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles
19
#                   separated by spaces, default - 80)
20 6c765698 majesty
#
21
# ## Installation
22 5f9e882b Lars Kruse
# Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file
23
# you wish to monitor.
24 6c765698 majesty
#
25
# Specify log_format at /etc/nginx/conf.d/upstream.conf:
26 5f9e882b Lars Kruse
# log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] \
27
#     cs=[$upstream_cache_status]"
28 6c765698 majesty
#
29
# Use it in your site configuration (/etc/nginx/sites-enabled/anything.conf):
30
# access_log /var/log/nginx/upstream.log upstream;
31
#
32 5f9e882b Lars Kruse
# Attention! Since the default user (nobody) does not have read permission for nginx log files we
33
# need to run it as root.
34 6efaef76 Igor Borodikhin
#
35 2eb7552c Igor Borodikhin
# And specify some options in /etc/munin/plugin-conf.d/munin-node:
36 6c765698 majesty
#
37
#     [nginx_upstream_multi_upstream]
38 6efaef76 Igor Borodikhin
#     user root
39 6c765698 majesty
#     env.graphs cache http time request
40
#     env.log /var/log/nginx/upstream.log
41
#     env.upstream 10.0.0.1:80 10.0.0.2:8080 unix:/tmp/upstream3
42
#     env.statuses 200 403 404 410 500 502
43
#     env.percentiles 50 80
44
#
45 5f9e882b Lars Kruse
#  #%# family=contrib
46 6c765698 majesty
47 13bd1599 Lars Kruse
import copy
48
import math
49
import os
50
import re
51
import sys
52 734da6b9 Lars Kruse
import time
53 6c765698 majesty
54 13bd1599 Lars Kruse
55 6c765698 majesty
# How we've been called
56
progName = sys.argv[0]
57 7063330e Lars Kruse
progName = progName[progName.rfind("/") + 1:]
58 6c765698 majesty
59 13bd1599 Lars Kruse
60 6c765698 majesty
# Where to store plugin state
61 d5fc30a9 Lars Kruse
stateDir = os.environ.get("MUNIN_PLUGSTATE", None)
62 6c765698 majesty
63
# Which site configuration we should use
64
siteName = progName[len("nginx_upstream_multi_"):]
65
66
# Log path
67 d5fc30a9 Lars Kruse
logPath = os.environ.get("log", "/var/log/nginx/access.log")
68 6c765698 majesty
69
# Http statuses list
70 5f9e882b Lars Kruse
httpStatusString = (
71
    "100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;"
72
    "203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;"
73
    "207:Multi-status;226:IM used;300:Multiple choices;301:Moved permanently;"
74
    "302:Moved temporarily;303:See other;304:Not modified;305:Use proxy;307:Temporary redirect;"
75
    "400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;404:Not found;"
76
    "405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;"
77
    "408:Request timeout;409:Conflict;410:Gone;411:Length required;412:Precondition failed;"
78 36748826 Lars Kruse
    "413:Request entity too large;414:Request URI too large;415:Unsupported media type;"
79 5f9e882b Lars Kruse
    "416:Request range not satisfiable;417:Expectation failed;422:Unprocessable entity;"
80
    "423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;"
81
    "449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;"
82
    "502:Bad gateway;503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;"
83
    "506:Variant also negotiates;507:Insufficient storage;508:Loop detected;"
84
    "509:Bandwidth limit exceeded;510:Not extended")
85 6c765698 majesty
86 d5fc30a9 Lars Kruse
# an empty list of wanted statuses is interpreted as: all statuses
87
statuses = os.environ.get("statuses", "").split()
88 6c765698 majesty
89
httpStatusList = {}
90
for statusString in httpStatusString.split(";"):
91
    [code, title] = statusString.split(":")
92
    if len(statuses) > 0 and code in statuses or len(statuses) == 0:
93
        httpStatusList[code] = {
94 5f9e882b Lars Kruse
            "title": title,
95
            "requests": 0
96 6c765698 majesty
        }
97
98 5f9e882b Lars Kruse
cacheStatusList = {"MISS": 0, "BYPASS": 0, "EXPIRED": 0, "UPDATING": 0, "STALE": 0, "HIT": 0}
99 6c765698 majesty
100
# Parse upstreams
101
upstreams = {}
102
if "upstream" in os.environ:
103
    upstreamString = os.environ["upstream"]
104
    upstreamList = upstreamString.split()
105
    for upstream in upstreamList:
106
        upstreams[upstream] = {
107 5f9e882b Lars Kruse
            "requests": 0,
108
            "time": 0,
109
            "times": [],
110
            "cache": copy.deepcopy(cacheStatusList),
111
            "http": copy.deepcopy(httpStatusList)
112 6c765698 majesty
        }
113
else:
114
    raise Exception("No upstreams specified")
115
116 d5fc30a9 Lars Kruse
percentiles = os.environ.get("percentiles", "80").split()
117 6c765698 majesty
118 d5fc30a9 Lars Kruse
graphs_enabled = os.environ.get("graphs", "cache http time request").split()
119 6c765698 majesty
120 734da6b9 Lars Kruse
now = int(time.time())
121 6c765698 majesty
122 d5fc30a9 Lars Kruse
lastBytePath = os.path.join(stateDir, "nginx_upstream_multi_{}_lastByte.txt".format(siteName))
123 6c765698 majesty
try:
124
    lastRun = os.path.getmtime(lastBytePath)
125
except OSError:
126
    lastRun = now
127
128
129
def sanitize(string):
130
    return string.replace(".", "_").replace(":", "_").replace("/", "_").replace("-", "_")
131
132 5f9e882b Lars Kruse
133 6c765698 majesty
if len(sys.argv) == 2 and sys.argv[1] == "config":
134
    # Parent graph declaration
135 13bd1599 Lars Kruse
    print("multigraph nginx_upstream_multi_%s" % siteName.replace(".", "_"))
136
    print("graph_title Requests number")
137
    print("graph_vlabel rps")
138
    print("graph_category webserver")
139 6c765698 majesty
    for upstream in upstreams.keys():
140 13bd1599 Lars Kruse
        print("us%s_requests.label %s" % (sanitize(upstream), upstream))
141 6c765698 majesty
142
    # Requests graph declaration
143
    if "request" in graphs_enabled:
144
        for upstream in upstreams.keys():
145 13bd1599 Lars Kruse
            print()
146 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_requests"
147
                  % (sanitize(siteName), sanitize(upstream)))
148 13bd1599 Lars Kruse
            print("graph_title Requests number - %s" % upstream)
149
            print("graph_vlabel rps")
150
            print("graph_category webserver")
151
            print("us%s_requests.label %s" % (sanitize(upstream), upstream))
152
            print()
153 6c765698 majesty
154
    # Times graph declaration
155
    if "time" in graphs_enabled:
156
        for upstream in upstreams.keys():
157 13bd1599 Lars Kruse
            print()
158 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_times"
159
                  % (sanitize(siteName), sanitize(upstream)))
160 13bd1599 Lars Kruse
            print("graph_title Request time - %s" % upstream)
161
            print("graph_vlabel sec.")
162
            print("graph_category webserver")
163
            print("us%s_times.label average" % (sanitize(upstream)))
164 6c765698 majesty
            for percentile in percentiles:
165 5f9e882b Lars Kruse
                print("us%s_times_percentile_%s.label %s-percentile"
166
                      % (sanitize(upstream), percentile, percentile))
167 13bd1599 Lars Kruse
            print()
168 6c765698 majesty
169
    # HTTP Status codes graph declaration
170
    if "http" in graphs_enabled:
171
        for upstream in upstreams.keys():
172 13bd1599 Lars Kruse
            print()
173 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_statuses"
174
                  % (sanitize(siteName), sanitize(upstream)))
175 13bd1599 Lars Kruse
            print("graph_title HTTP - %s" % upstream)
176
            print("graph_vlabel rps")
177
            print("graph_category webserver")
178
            for status in sorted(httpStatusList.keys()):
179 5f9e882b Lars Kruse
                print("http%s_%s_status.label %s - %s"
180
                      % (status, sanitize(upstream), status, httpStatusList[status]["title"]))
181 13bd1599 Lars Kruse
            print()
182 6c765698 majesty
183
    # Cache status graph declaration
184
    if "cache" in graphs_enabled:
185
        for upstream in upstreams.keys():
186 13bd1599 Lars Kruse
            print()
187 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_cache"
188
                  % (sanitize(siteName), sanitize(upstream)))
189 13bd1599 Lars Kruse
            print("graph_title Cache - %s" % upstream)
190
            print("graph_vlabel rps")
191
            print("graph_category webserver")
192 6c765698 majesty
            for status in cacheStatusList:
193 13bd1599 Lars Kruse
                print("us%s_%s_cache.label %s" % (sanitize(status), sanitize(upstream), status))
194
            print()
195 6c765698 majesty
else:
196
    timeElapsed = now - lastRun
197
198
    lastByteHandle = None
199
200
    try:
201
        lastByteHandle = open(lastBytePath, "r")
202
        lastByte = int(lastByteHandle.read())
203
    except Exception:
204
        lastByte = 0
205
206 5f9e882b Lars Kruse
    if lastByteHandle is not None:
207 6c765698 majesty
        lastByteHandle.close()
208
209
    try:
210
        logHandle = open(logPath, "r")
211 6efaef76 Igor Borodikhin
    except Exception as e:
212 13bd1599 Lars Kruse
        print("Log file %s not readable: %s" % (logPath, e.strerror), file=sys.stderr)
213 6c765698 majesty
        sys.exit(1)
214
215
    try:
216
        logSize = int(os.path.getsize(logPath))
217
    except ValueError:
218
        logSize = 0
219
220
    if logSize < lastByte:
221
        lastByte = 0
222
223
    regExp = re.compile(r"ua=\[(.*?)\]\s+ut=\[(.*?)\]\s+us=\[(.*?)\]\s+cs=\[(.*?)\]")
224
225
    logHandle.seek(lastByte)
226
    for line in logHandle:
227
        match = regExp.search(line)
228
        if (match):
229
            # Extract data
230
            address = match.group(1)
231 734da6b9 Lars Kruse
            request_time = match.group(2)
232 5f9e882b Lars Kruse
            status = match.group(3)
233
            cache = match.group(4)
234 6c765698 majesty
235
            # Replace separators by space
236
            address = address.replace(",", " ")
237
            address = address.replace(" : ", " ")
238 5f9e882b Lars Kruse
            address = re.sub(r"\s+", " ", address)
239 6c765698 majesty
240 734da6b9 Lars Kruse
            request_time = request_time.replace(",", " ")
241
            request_time = request_time.replace(" : ", " ")
242
            request_time = re.sub(r"\s+", " ", request_time)
243 6c765698 majesty
244 5f9e882b Lars Kruse
            status = status.replace(",", " ")
245
            status = status.replace(" : ", " ")
246
            status = re.sub(r"\s+", " ", status)
247 6c765698 majesty
248 5f9e882b Lars Kruse
            cache = cache.replace(",", " ")
249
            cache = cache.replace(" : ", " ")
250
            cache = re.sub(r"\s+", " ", cache)
251 6c765698 majesty
252
            addresses = address.split()
253 734da6b9 Lars Kruse
            times = request_time.split()
254 5f9e882b Lars Kruse
            statuses = status.split()
255
            caches = cache.split()
256 6c765698 majesty
257
            index = 0
258
            for uAddress in addresses:
259
                if uAddress in upstreams.keys():
260
                    try:
261 5f9e882b Lars Kruse
                        uTime = float(times[index])
262 6c765698 majesty
                    except ValueError:
263 5f9e882b Lars Kruse
                        uTime = 0
264 6c765698 majesty
265
                    if index < len(statuses):
266 5f9e882b Lars Kruse
                        uStatus = statuses[index]
267 6c765698 majesty
                    else:
268
                        uStatus = "-"
269
270
                    if index < len(caches):
271 5f9e882b Lars Kruse
                        uCache = caches[index]
272 6c765698 majesty
                    else:
273
                        uCache = "-"
274
275
                    if uAddress != "-":
276 5f9e882b Lars Kruse
                        upstreams[uAddress]["requests"] += 1
277 6c765698 majesty
                    if uTime != "-":
278 5f9e882b Lars Kruse
                        upstreams[uAddress]["time"] += uTime
279 6c765698 majesty
                        upstreams[uAddress]["times"].append(uTime)
280
                    if uStatus != "-" and uStatus in upstreams[uAddress]["http"].keys():
281
                        upstreams[uAddress]["http"][uStatus]["requests"] += 1
282
                    if uCache != "-":
283 5f9e882b Lars Kruse
                        upstreams[uAddress]["cache"][uCache] += 1
284 6c765698 majesty
                index += 1
285
286
    try:
287
        lastByteHandle = open(lastBytePath, "w")
288
        lastByteHandle.write(str(logHandle.tell()))
289
        lastByteHandle.close()
290 6efaef76 Igor Borodikhin
    except Exception as e:
291 13bd1599 Lars Kruse
        print("Failed to write status file (%s): %s" % (lastBytePath, e.strerror), file=sys.stderr)
292 6c765698 majesty
        sys.exit(1)
293
294
    logHandle.close()
295
296
    # Parent graph data
297
    for upstream in upstreams.keys():
298
        value = 0
299
        if timeElapsed > 0:
300
            value = upstreams[upstream]["requests"] / timeElapsed
301
302 13bd1599 Lars Kruse
        print("us%s_requests.value %s" % (sanitize(upstream), value))
303 6c765698 majesty
304
    # Requests graph data
305
    if "request" in graphs_enabled:
306
        for upstream in upstreams.keys():
307 13bd1599 Lars Kruse
            print()
308 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_requests"
309
                  % (sanitize(siteName), sanitize(upstream)))
310 6c765698 majesty
            value = 0
311
            if timeElapsed > 0:
312
                value = upstreams[upstream]["requests"] / timeElapsed
313 13bd1599 Lars Kruse
            print("us%s_requests.value %s" % (sanitize(upstream), value))
314
            print()
315 6c765698 majesty
316
    # Times graph data
317
    if "time" in graphs_enabled:
318
        for upstream in upstreams.keys():
319
            uTime = 0
320
            if upstreams[upstream]["requests"] > 0:
321
                uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
322
                upstreams[upstream]["times"].sort()
323 13bd1599 Lars Kruse
            print()
324 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_times"
325
                  % (sanitize(siteName), sanitize(upstream)))
326 13bd1599 Lars Kruse
            print("us%s_times.value %s" % (sanitize(upstream), uTime))
327 6c765698 majesty
            for percentile in percentiles:
328
                percentileValue = 0
329
                if upstreams[upstream]["requests"] > 0:
330
                    uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"]
331
                    percentileKey = int(percentile) * len(upstreams[upstream]["times"]) / 100
332 5f9e882b Lars Kruse
                    if len(upstreams[upstream]["times"]) % 2 > 0:
333 6c765698 majesty
                        low = int(math.floor(percentileKey))
334
                        high = int(math.ceil(percentileKey))
335 5f9e882b Lars Kruse
                        percentileValue = (upstreams[upstream]["times"][low]
336
                                           + upstreams[upstream]["times"][high]) / 2
337 6c765698 majesty
                    else:
338
                        percentileValue = upstreams[upstream]["times"][int(percentileKey)]
339 5f9e882b Lars Kruse
                print("us%s_times_percentile_%s.value %s"
340
                      % (sanitize(upstream), percentile, percentileValue))
341 13bd1599 Lars Kruse
            print()
342 6c765698 majesty
343
    # HTTP Status codes graph data
344
    if "http" in graphs_enabled:
345
        for upstream in upstreams.keys():
346 13bd1599 Lars Kruse
            print()
347 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_statuses"
348
                  % (sanitize(siteName), sanitize(upstream)))
349 13bd1599 Lars Kruse
            for status in sorted(httpStatusList.keys()):
350 6c765698 majesty
                value = 0
351
                if timeElapsed > 0:
352
                    value = upstreams[upstream]["http"][status]["requests"] / timeElapsed
353
354 13bd1599 Lars Kruse
                print("http%s_%s_status.value %s" % (status, sanitize(upstream), value))
355
            print()
356 6c765698 majesty
357
    # Cache status graph data
358
    if "cache" in graphs_enabled:
359
        for upstream in upstreams.keys():
360 13bd1599 Lars Kruse
            print()
361 5f9e882b Lars Kruse
            print("multigraph nginx_upstream_multi_%s.%s_cache"
362
                  % (sanitize(siteName), sanitize(upstream)))
363 6c765698 majesty
            for status in cacheStatusList:
364
                value = 0
365
                if timeElapsed > 0:
366
                    value = upstreams[upstream]["cache"][status] / timeElapsed
367
368 13bd1599 Lars Kruse
                print("us%s_%s_cache.value %s" % (sanitize(status), sanitize(upstream), value))
369
            print()