root / plugins / nginx / nginx_upstream_multi_ @ a7139bca
Historique | Voir | Annoter | Télécharger (13,9 ko)
| 1 | 13bd1599 | Lars Kruse | #!/usr/bin/env python3 |
|---|---|---|---|
| 2 | 6c765698 | majesty | # |
| 3 | 5f9e882b | Lars Kruse | # Munin plugin to monitor requests number, cache statuses, http status codes and average request |
| 4 | # times of specified nginx upstreams. |
||
| 5 | 6c765698 | majesty | # |
| 6 | # Copyright Igor Borodikhin |
||
| 7 | # |
||
| 8 | # License : GPLv3 |
||
| 9 | # |
||
| 10 | # Configuration parameters: |
||
| 11 | 5f9e882b | Lars Kruse | # env.graphs - which graphs to produce (optional, list of graphs separated by spaces, default - |
| 12 | # cache http time request) |
||
| 13 | 6c765698 | majesty | # env.log - log file path (mandatory, ex.: /var/log/nginx/upstream.log) |
| 14 | 5f9e882b | Lars Kruse | # env.upstream - list of upstreams to monitor (mandatory, including port numbers separated by |
| 15 | # space, e.g.: 10.0.0.1:80 10.0.0.2:8080) |
||
| 16 | # env.statuses - list of http status codes to monitor (optional, default - all statuses, |
||
| 17 | # e.g.: 200 403 404 410 500 502) |
||
| 18 | # env.percentiles - which percentiles to draw on time graphs (optional, list of percentiles |
||
| 19 | # separated by spaces, default - 80) |
||
| 20 | 6c765698 | majesty | # |
| 21 | # ## Installation |
||
| 22 | 5f9e882b | Lars Kruse | # Copy file to directory /usr/share/munin/pligins/ and create symbolic link(s) for each log file |
| 23 | # you wish to monitor. |
||
| 24 | 6c765698 | majesty | # |
| 25 | # Specify log_format at /etc/nginx/conf.d/upstream.conf: |
||
| 26 | 5f9e882b | Lars Kruse | # log_format upstream "ua=[$upstream_addr] ut=[$upstream_response_time] us=[$upstream_status] \ |
| 27 | # cs=[$upstream_cache_status]" |
||
| 28 | 6c765698 | majesty | # |
| 29 | # Use it in your site configuration (/etc/nginx/sites-enabled/anything.conf): |
||
| 30 | # access_log /var/log/nginx/upstream.log upstream; |
||
| 31 | # |
||
| 32 | 5f9e882b | Lars Kruse | # Attention! Since the default user (nobody) does not have read permission for nginx log files we |
| 33 | # need to run it as root. |
||
| 34 | 6efaef76 | Igor Borodikhin | # |
| 35 | 2eb7552c | Igor Borodikhin | # And specify some options in /etc/munin/plugin-conf.d/munin-node: |
| 36 | 6c765698 | majesty | # |
| 37 | # [nginx_upstream_multi_upstream] |
||
| 38 | 6efaef76 | Igor Borodikhin | # user root |
| 39 | 6c765698 | majesty | # env.graphs cache http time request |
| 40 | # env.log /var/log/nginx/upstream.log |
||
| 41 | # env.upstream 10.0.0.1:80 10.0.0.2:8080 unix:/tmp/upstream3 |
||
| 42 | # env.statuses 200 403 404 410 500 502 |
||
| 43 | # env.percentiles 50 80 |
||
| 44 | # |
||
| 45 | 5f9e882b | Lars Kruse | # #%# family=contrib |
| 46 | 6c765698 | majesty | |
| 47 | 13bd1599 | Lars Kruse | import copy |
| 48 | import math |
||
| 49 | import os |
||
| 50 | import re |
||
| 51 | import sys |
||
| 52 | 734da6b9 | Lars Kruse | import time |
| 53 | 6c765698 | majesty | |
| 54 | 13bd1599 | Lars Kruse | |
| 55 | 6c765698 | majesty | # How we've been called |
| 56 | progName = sys.argv[0] |
||
| 57 | 7063330e | Lars Kruse | progName = progName[progName.rfind("/") + 1:]
|
| 58 | 6c765698 | majesty | |
| 59 | 13bd1599 | Lars Kruse | |
| 60 | 6c765698 | majesty | # Where to store plugin state |
| 61 | d5fc30a9 | Lars Kruse | stateDir = os.environ.get("MUNIN_PLUGSTATE", None)
|
| 62 | 6c765698 | majesty | |
| 63 | # Which site configuration we should use |
||
| 64 | siteName = progName[len("nginx_upstream_multi_"):]
|
||
| 65 | |||
| 66 | # Log path |
||
| 67 | d5fc30a9 | Lars Kruse | logPath = os.environ.get("log", "/var/log/nginx/access.log")
|
| 68 | 6c765698 | majesty | |
| 69 | # Http statuses list |
||
| 70 | 5f9e882b | Lars Kruse | httpStatusString = ( |
| 71 | "100:Continue;101:Switching protocols;102:Processing;200:OK;201:Created;202:Accepted;" |
||
| 72 | "203:Non-Authoritative Information;204:No content;205:Reset content;206:Partial content;" |
||
| 73 | "207:Multi-status;226:IM used;300:Multiple choices;301:Moved permanently;" |
||
| 74 | "302:Moved temporarily;303:See other;304:Not modified;305:Use proxy;307:Temporary redirect;" |
||
| 75 | "400:Bad request;401:Unauthorized;402:Payment required;403:Forbidden;404:Not found;" |
||
| 76 | "405:Method not allowed;406:Not acceptable;407:Proxy Authentication Required;" |
||
| 77 | "408:Request timeout;409:Conflict;410:Gone;411:Length required;412:Precondition failed;" |
||
| 78 | 36748826 | Lars Kruse | "413:Request entity too large;414:Request URI too large;415:Unsupported media type;" |
| 79 | 5f9e882b | Lars Kruse | "416:Request range not satisfiable;417:Expectation failed;422:Unprocessable entity;" |
| 80 | "423:Locked;424:Failed dependency;425:Unordered collection;426:Upgrade required;" |
||
| 81 | "449:Retry with;456:Unrecoverable error;500:Internal server error;501:Not implemented;" |
||
| 82 | "502:Bad gateway;503:Service unavailable;504:Gateway timeout;505:HTTP version not supported;" |
||
| 83 | "506:Variant also negotiates;507:Insufficient storage;508:Loop detected;" |
||
| 84 | "509:Bandwidth limit exceeded;510:Not extended") |
||
| 85 | 6c765698 | majesty | |
| 86 | d5fc30a9 | Lars Kruse | # an empty list of wanted statuses is interpreted as: all statuses |
| 87 | statuses = os.environ.get("statuses", "").split()
|
||
| 88 | 6c765698 | majesty | |
| 89 | httpStatusList = {}
|
||
| 90 | for statusString in httpStatusString.split(";"):
|
||
| 91 | [code, title] = statusString.split(":")
|
||
| 92 | if len(statuses) > 0 and code in statuses or len(statuses) == 0: |
||
| 93 | httpStatusList[code] = {
|
||
| 94 | 5f9e882b | Lars Kruse | "title": title, |
| 95 | "requests": 0 |
||
| 96 | 6c765698 | majesty | } |
| 97 | |||
| 98 | 5f9e882b | Lars Kruse | cacheStatusList = {"MISS": 0, "BYPASS": 0, "EXPIRED": 0, "UPDATING": 0, "STALE": 0, "HIT": 0}
|
| 99 | 6c765698 | majesty | |
| 100 | # Parse upstreams |
||
| 101 | upstreams = {}
|
||
| 102 | if "upstream" in os.environ: |
||
| 103 | upstreamString = os.environ["upstream"] |
||
| 104 | upstreamList = upstreamString.split() |
||
| 105 | for upstream in upstreamList: |
||
| 106 | upstreams[upstream] = {
|
||
| 107 | 5f9e882b | Lars Kruse | "requests": 0, |
| 108 | "time": 0, |
||
| 109 | "times": [], |
||
| 110 | "cache": copy.deepcopy(cacheStatusList), |
||
| 111 | "http": copy.deepcopy(httpStatusList) |
||
| 112 | 6c765698 | majesty | } |
| 113 | else: |
||
| 114 | raise Exception("No upstreams specified")
|
||
| 115 | |||
| 116 | d5fc30a9 | Lars Kruse | percentiles = os.environ.get("percentiles", "80").split()
|
| 117 | 6c765698 | majesty | |
| 118 | d5fc30a9 | Lars Kruse | graphs_enabled = os.environ.get("graphs", "cache http time request").split()
|
| 119 | 6c765698 | majesty | |
| 120 | 734da6b9 | Lars Kruse | now = int(time.time()) |
| 121 | 6c765698 | majesty | |
| 122 | d5fc30a9 | Lars Kruse | lastBytePath = os.path.join(stateDir, "nginx_upstream_multi_{}_lastByte.txt".format(siteName))
|
| 123 | 6c765698 | majesty | try: |
| 124 | lastRun = os.path.getmtime(lastBytePath) |
||
| 125 | except OSError: |
||
| 126 | lastRun = now |
||
| 127 | |||
| 128 | |||
| 129 | def sanitize(string): |
||
| 130 | return string.replace(".", "_").replace(":", "_").replace("/", "_").replace("-", "_")
|
||
| 131 | |||
| 132 | 5f9e882b | Lars Kruse | |
| 133 | 6c765698 | majesty | if len(sys.argv) == 2 and sys.argv[1] == "config": |
| 134 | # Parent graph declaration |
||
| 135 | 13bd1599 | Lars Kruse | print("multigraph nginx_upstream_multi_%s" % siteName.replace(".", "_"))
|
| 136 | print("graph_title Requests number")
|
||
| 137 | print("graph_vlabel rps")
|
||
| 138 | print("graph_category webserver")
|
||
| 139 | 6c765698 | majesty | for upstream in upstreams.keys(): |
| 140 | 13bd1599 | Lars Kruse | print("us%s_requests.label %s" % (sanitize(upstream), upstream))
|
| 141 | 6c765698 | majesty | |
| 142 | # Requests graph declaration |
||
| 143 | if "request" in graphs_enabled: |
||
| 144 | for upstream in upstreams.keys(): |
||
| 145 | 13bd1599 | Lars Kruse | print() |
| 146 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_requests"
|
| 147 | % (sanitize(siteName), sanitize(upstream))) |
||
| 148 | 13bd1599 | Lars Kruse | print("graph_title Requests number - %s" % upstream)
|
| 149 | print("graph_vlabel rps")
|
||
| 150 | print("graph_category webserver")
|
||
| 151 | print("us%s_requests.label %s" % (sanitize(upstream), upstream))
|
||
| 152 | print() |
||
| 153 | 6c765698 | majesty | |
| 154 | # Times graph declaration |
||
| 155 | if "time" in graphs_enabled: |
||
| 156 | for upstream in upstreams.keys(): |
||
| 157 | 13bd1599 | Lars Kruse | print() |
| 158 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_times"
|
| 159 | % (sanitize(siteName), sanitize(upstream))) |
||
| 160 | 13bd1599 | Lars Kruse | print("graph_title Request time - %s" % upstream)
|
| 161 | print("graph_vlabel sec.")
|
||
| 162 | print("graph_category webserver")
|
||
| 163 | print("us%s_times.label average" % (sanitize(upstream)))
|
||
| 164 | 6c765698 | majesty | for percentile in percentiles: |
| 165 | 5f9e882b | Lars Kruse | print("us%s_times_percentile_%s.label %s-percentile"
|
| 166 | % (sanitize(upstream), percentile, percentile)) |
||
| 167 | 13bd1599 | Lars Kruse | print() |
| 168 | 6c765698 | majesty | |
| 169 | # HTTP Status codes graph declaration |
||
| 170 | if "http" in graphs_enabled: |
||
| 171 | for upstream in upstreams.keys(): |
||
| 172 | 13bd1599 | Lars Kruse | print() |
| 173 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_statuses"
|
| 174 | % (sanitize(siteName), sanitize(upstream))) |
||
| 175 | 13bd1599 | Lars Kruse | print("graph_title HTTP - %s" % upstream)
|
| 176 | print("graph_vlabel rps")
|
||
| 177 | print("graph_category webserver")
|
||
| 178 | for status in sorted(httpStatusList.keys()): |
||
| 179 | 5f9e882b | Lars Kruse | print("http%s_%s_status.label %s - %s"
|
| 180 | % (status, sanitize(upstream), status, httpStatusList[status]["title"])) |
||
| 181 | 13bd1599 | Lars Kruse | print() |
| 182 | 6c765698 | majesty | |
| 183 | # Cache status graph declaration |
||
| 184 | if "cache" in graphs_enabled: |
||
| 185 | for upstream in upstreams.keys(): |
||
| 186 | 13bd1599 | Lars Kruse | print() |
| 187 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_cache"
|
| 188 | % (sanitize(siteName), sanitize(upstream))) |
||
| 189 | 13bd1599 | Lars Kruse | print("graph_title Cache - %s" % upstream)
|
| 190 | print("graph_vlabel rps")
|
||
| 191 | print("graph_category webserver")
|
||
| 192 | 6c765698 | majesty | for status in cacheStatusList: |
| 193 | 13bd1599 | Lars Kruse | print("us%s_%s_cache.label %s" % (sanitize(status), sanitize(upstream), status))
|
| 194 | print() |
||
| 195 | 6c765698 | majesty | else: |
| 196 | timeElapsed = now - lastRun |
||
| 197 | |||
| 198 | lastByteHandle = None |
||
| 199 | |||
| 200 | try: |
||
| 201 | lastByteHandle = open(lastBytePath, "r") |
||
| 202 | lastByte = int(lastByteHandle.read()) |
||
| 203 | except Exception: |
||
| 204 | lastByte = 0 |
||
| 205 | |||
| 206 | 5f9e882b | Lars Kruse | if lastByteHandle is not None: |
| 207 | 6c765698 | majesty | lastByteHandle.close() |
| 208 | |||
| 209 | try: |
||
| 210 | logHandle = open(logPath, "r") |
||
| 211 | 6efaef76 | Igor Borodikhin | except Exception as e: |
| 212 | 13bd1599 | Lars Kruse | print("Log file %s not readable: %s" % (logPath, e.strerror), file=sys.stderr)
|
| 213 | 6c765698 | majesty | sys.exit(1) |
| 214 | |||
| 215 | try: |
||
| 216 | logSize = int(os.path.getsize(logPath)) |
||
| 217 | except ValueError: |
||
| 218 | logSize = 0 |
||
| 219 | |||
| 220 | if logSize < lastByte: |
||
| 221 | lastByte = 0 |
||
| 222 | |||
| 223 | regExp = re.compile(r"ua=\[(.*?)\]\s+ut=\[(.*?)\]\s+us=\[(.*?)\]\s+cs=\[(.*?)\]") |
||
| 224 | |||
| 225 | logHandle.seek(lastByte) |
||
| 226 | for line in logHandle: |
||
| 227 | match = regExp.search(line) |
||
| 228 | if (match): |
||
| 229 | # Extract data |
||
| 230 | address = match.group(1) |
||
| 231 | 734da6b9 | Lars Kruse | request_time = match.group(2) |
| 232 | 5f9e882b | Lars Kruse | status = match.group(3) |
| 233 | cache = match.group(4) |
||
| 234 | 6c765698 | majesty | |
| 235 | # Replace separators by space |
||
| 236 | address = address.replace(",", " ")
|
||
| 237 | address = address.replace(" : ", " ")
|
||
| 238 | 5f9e882b | Lars Kruse | address = re.sub(r"\s+", " ", address) |
| 239 | 6c765698 | majesty | |
| 240 | 734da6b9 | Lars Kruse | request_time = request_time.replace(",", " ")
|
| 241 | request_time = request_time.replace(" : ", " ")
|
||
| 242 | request_time = re.sub(r"\s+", " ", request_time) |
||
| 243 | 6c765698 | majesty | |
| 244 | 5f9e882b | Lars Kruse | status = status.replace(",", " ")
|
| 245 | status = status.replace(" : ", " ")
|
||
| 246 | status = re.sub(r"\s+", " ", status) |
||
| 247 | 6c765698 | majesty | |
| 248 | 5f9e882b | Lars Kruse | cache = cache.replace(",", " ")
|
| 249 | cache = cache.replace(" : ", " ")
|
||
| 250 | cache = re.sub(r"\s+", " ", cache) |
||
| 251 | 6c765698 | majesty | |
| 252 | addresses = address.split() |
||
| 253 | 734da6b9 | Lars Kruse | times = request_time.split() |
| 254 | 5f9e882b | Lars Kruse | statuses = status.split() |
| 255 | caches = cache.split() |
||
| 256 | 6c765698 | majesty | |
| 257 | index = 0 |
||
| 258 | for uAddress in addresses: |
||
| 259 | if uAddress in upstreams.keys(): |
||
| 260 | try: |
||
| 261 | 5f9e882b | Lars Kruse | uTime = float(times[index]) |
| 262 | 6c765698 | majesty | except ValueError: |
| 263 | 5f9e882b | Lars Kruse | uTime = 0 |
| 264 | 6c765698 | majesty | |
| 265 | if index < len(statuses): |
||
| 266 | 5f9e882b | Lars Kruse | uStatus = statuses[index] |
| 267 | 6c765698 | majesty | else: |
| 268 | uStatus = "-" |
||
| 269 | |||
| 270 | if index < len(caches): |
||
| 271 | 5f9e882b | Lars Kruse | uCache = caches[index] |
| 272 | 6c765698 | majesty | else: |
| 273 | uCache = "-" |
||
| 274 | |||
| 275 | if uAddress != "-": |
||
| 276 | 5f9e882b | Lars Kruse | upstreams[uAddress]["requests"] += 1 |
| 277 | 6c765698 | majesty | if uTime != "-": |
| 278 | 5f9e882b | Lars Kruse | upstreams[uAddress]["time"] += uTime |
| 279 | 6c765698 | majesty | upstreams[uAddress]["times"].append(uTime) |
| 280 | if uStatus != "-" and uStatus in upstreams[uAddress]["http"].keys(): |
||
| 281 | upstreams[uAddress]["http"][uStatus]["requests"] += 1 |
||
| 282 | if uCache != "-": |
||
| 283 | 5f9e882b | Lars Kruse | upstreams[uAddress]["cache"][uCache] += 1 |
| 284 | 6c765698 | majesty | index += 1 |
| 285 | |||
| 286 | try: |
||
| 287 | lastByteHandle = open(lastBytePath, "w") |
||
| 288 | lastByteHandle.write(str(logHandle.tell())) |
||
| 289 | lastByteHandle.close() |
||
| 290 | 6efaef76 | Igor Borodikhin | except Exception as e: |
| 291 | 13bd1599 | Lars Kruse | print("Failed to write status file (%s): %s" % (lastBytePath, e.strerror), file=sys.stderr)
|
| 292 | 6c765698 | majesty | sys.exit(1) |
| 293 | |||
| 294 | logHandle.close() |
||
| 295 | |||
| 296 | # Parent graph data |
||
| 297 | for upstream in upstreams.keys(): |
||
| 298 | value = 0 |
||
| 299 | if timeElapsed > 0: |
||
| 300 | value = upstreams[upstream]["requests"] / timeElapsed |
||
| 301 | |||
| 302 | 13bd1599 | Lars Kruse | print("us%s_requests.value %s" % (sanitize(upstream), value))
|
| 303 | 6c765698 | majesty | |
| 304 | # Requests graph data |
||
| 305 | if "request" in graphs_enabled: |
||
| 306 | for upstream in upstreams.keys(): |
||
| 307 | 13bd1599 | Lars Kruse | print() |
| 308 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_requests"
|
| 309 | % (sanitize(siteName), sanitize(upstream))) |
||
| 310 | 6c765698 | majesty | value = 0 |
| 311 | if timeElapsed > 0: |
||
| 312 | value = upstreams[upstream]["requests"] / timeElapsed |
||
| 313 | 13bd1599 | Lars Kruse | print("us%s_requests.value %s" % (sanitize(upstream), value))
|
| 314 | print() |
||
| 315 | 6c765698 | majesty | |
| 316 | # Times graph data |
||
| 317 | if "time" in graphs_enabled: |
||
| 318 | for upstream in upstreams.keys(): |
||
| 319 | uTime = 0 |
||
| 320 | if upstreams[upstream]["requests"] > 0: |
||
| 321 | uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"] |
||
| 322 | upstreams[upstream]["times"].sort() |
||
| 323 | 13bd1599 | Lars Kruse | print() |
| 324 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_times"
|
| 325 | % (sanitize(siteName), sanitize(upstream))) |
||
| 326 | 13bd1599 | Lars Kruse | print("us%s_times.value %s" % (sanitize(upstream), uTime))
|
| 327 | 6c765698 | majesty | for percentile in percentiles: |
| 328 | percentileValue = 0 |
||
| 329 | if upstreams[upstream]["requests"] > 0: |
||
| 330 | uTime = upstreams[upstream]["time"] / upstreams[upstream]["requests"] |
||
| 331 | percentileKey = int(percentile) * len(upstreams[upstream]["times"]) / 100 |
||
| 332 | 5f9e882b | Lars Kruse | if len(upstreams[upstream]["times"]) % 2 > 0: |
| 333 | 6c765698 | majesty | low = int(math.floor(percentileKey)) |
| 334 | high = int(math.ceil(percentileKey)) |
||
| 335 | 5f9e882b | Lars Kruse | percentileValue = (upstreams[upstream]["times"][low] |
| 336 | + upstreams[upstream]["times"][high]) / 2 |
||
| 337 | 6c765698 | majesty | else: |
| 338 | percentileValue = upstreams[upstream]["times"][int(percentileKey)] |
||
| 339 | 5f9e882b | Lars Kruse | print("us%s_times_percentile_%s.value %s"
|
| 340 | % (sanitize(upstream), percentile, percentileValue)) |
||
| 341 | 13bd1599 | Lars Kruse | print() |
| 342 | 6c765698 | majesty | |
| 343 | # HTTP Status codes graph data |
||
| 344 | if "http" in graphs_enabled: |
||
| 345 | for upstream in upstreams.keys(): |
||
| 346 | 13bd1599 | Lars Kruse | print() |
| 347 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_statuses"
|
| 348 | % (sanitize(siteName), sanitize(upstream))) |
||
| 349 | 13bd1599 | Lars Kruse | for status in sorted(httpStatusList.keys()): |
| 350 | 6c765698 | majesty | value = 0 |
| 351 | if timeElapsed > 0: |
||
| 352 | value = upstreams[upstream]["http"][status]["requests"] / timeElapsed |
||
| 353 | |||
| 354 | 13bd1599 | Lars Kruse | print("http%s_%s_status.value %s" % (status, sanitize(upstream), value))
|
| 355 | print() |
||
| 356 | 6c765698 | majesty | |
| 357 | # Cache status graph data |
||
| 358 | if "cache" in graphs_enabled: |
||
| 359 | for upstream in upstreams.keys(): |
||
| 360 | 13bd1599 | Lars Kruse | print() |
| 361 | 5f9e882b | Lars Kruse | print("multigraph nginx_upstream_multi_%s.%s_cache"
|
| 362 | % (sanitize(siteName), sanitize(upstream))) |
||
| 363 | 6c765698 | majesty | for status in cacheStatusList: |
| 364 | value = 0 |
||
| 365 | if timeElapsed > 0: |
||
| 366 | value = upstreams[upstream]["cache"][status] / timeElapsed |
||
| 367 | |||
| 368 | 13bd1599 | Lars Kruse | print("us%s_%s_cache.value %s" % (sanitize(status), sanitize(upstream), value))
|
| 369 | print() |
